from __future__ import absolute_import, division, unicode_literals import os import sys import codecs import glob import xml.sax.handler base_path = os.path.split(__file__)[0] test_dir = os.path.join(base_path, 'testdata') sys.path.insert(0, os.path.abspath(os.path.join(base_path, os.path.pardir, os.path.pardir))) from html5lib import treebuilders del base_path # Build a dict of avaliable trees treeTypes = {"DOM": treebuilders.getTreeBuilder("dom")} # Try whatever etree implementations are avaliable from a list that are #"supposed" to work try: import xml.etree.ElementTree as ElementTree treeTypes['ElementTree'] = treebuilders.getTreeBuilder("etree", ElementTree, fullTree=True) except ImportError: try: import elementtree.ElementTree as ElementTree treeTypes['ElementTree'] = treebuilders.getTreeBuilder("etree", ElementTree, fullTree=True) except ImportError: pass try: import xml.etree.cElementTree as cElementTree treeTypes['cElementTree'] = treebuilders.getTreeBuilder("etree", cElementTree, fullTree=True) except ImportError: try: import cElementTree treeTypes['cElementTree'] = treebuilders.getTreeBuilder("etree", cElementTree, fullTree=True) except ImportError: pass try: import lxml.etree as lxml # flake8: noqa except ImportError: pass else: treeTypes['lxml'] = treebuilders.getTreeBuilder("lxml") def get_data_files(subdirectory, files='*.dat'): return glob.glob(os.path.join(test_dir, subdirectory, files)) class DefaultDict(dict): def __init__(self, default, *args, **kwargs): self.default = default dict.__init__(self, *args, **kwargs) def __getitem__(self, key): return dict.get(self, key, self.default) class TestData(object): def __init__(self, filename, newTestHeading="data", encoding="utf8"): if encoding is None: self.f = open(filename, mode="rb") else: self.f = codecs.open(filename, encoding=encoding) self.encoding = encoding self.newTestHeading = newTestHeading def __del__(self): self.f.close() def __iter__(self): data = DefaultDict(None) key = None for line in self.f: heading = self.isSectionHeading(line) if heading: if data and heading == self.newTestHeading: # Remove trailing newline data[key] = data[key][:-1] yield self.normaliseOutput(data) data = DefaultDict(None) key = heading data[key] = "" if self.encoding else b"" elif key is not None: data[key] += line if data: yield self.normaliseOutput(data) def isSectionHeading(self, line): """If the current heading is a test section heading return the heading, otherwise return False""" # print(line) if line.startswith("#" if self.encoding else b"#"): return line[1:].strip() else: return False def normaliseOutput(self, data): # Remove trailing newlines for key, value in data.items(): if value.endswith("\n" if self.encoding else b"\n"): data[key] = value[:-1] return data def convert(stripChars): def convertData(data): """convert the output of str(document) to the format used in the testcases""" data = data.split("\n") rv = [] for line in data: if line.startswith("|"): rv.append(line[stripChars:]) else: rv.append(line) return "\n".join(rv) return convertData convertExpected = convert(2) def errorMessage(input, expected, actual): msg = ("Input:\n%s\nExpected:\n%s\nRecieved\n%s\n" % (repr(input), repr(expected), repr(actual))) if sys.version_info.major == 2: msg = msg.encode("ascii", "backslashreplace") return msg class TracingSaxHandler(xml.sax.handler.ContentHandler): def __init__(self): xml.sax.handler.ContentHandler.__init__(self) self.visited = [] def startDocument(self): self.visited.append('startDocument') def endDocument(self): self.visited.append('endDocument') def startPrefixMapping(self, prefix, uri): # These are ignored as their order is not guaranteed pass def endPrefixMapping(self, prefix): # These are ignored as their order is not guaranteed pass def startElement(self, name, attrs): self.visited.append(('startElement', name, attrs)) def endElement(self, name): self.visited.append(('endElement', name)) def startElementNS(self, name, qname, attrs): self.visited.append(('startElementNS', name, qname, dict(attrs))) def endElementNS(self, name, qname): self.visited.append(('endElementNS', name, qname)) def characters(self, content): self.visited.append(('characters', content)) def ignorableWhitespace(self, whitespace): self.visited.append(('ignorableWhitespace', whitespace)) def processingInstruction(self, target, data): self.visited.append(('processingInstruction', target, data)) def skippedEntity(self, name): self.visited.append(('skippedEntity', name))