Lines Matching refs:self
37 def __init__(self, stream, encoding=None, parseMeta=True, useChardet=True, argument
40 self.stream = HTMLInputStream(stream, encoding, parseMeta, useChardet)
41 self.parser = parser
44 self.lowercaseElementName = lowercaseElementName
45 self.lowercaseAttrName = lowercaseAttrName
48 self.escapeFlag = False
49 self.lastFourChars = []
50 self.state = self.dataState
51 self.escape = False
54 self.currentToken = None
55 super(HTMLTokenizer, self).__init__()
57 def __iter__(self): argument
64 self.tokenQueue = deque([])
67 while self.state():
68 while self.stream.errors:
69 yield {"type": tokenTypes["ParseError"], "data": self.stream.errors.pop(0)}
70 while self.tokenQueue:
71 yield self.tokenQueue.popleft()
73 def consumeNumberEntity(self, isHex): argument
89 c = self.stream.char()
92 c = self.stream.char()
100 self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
106 self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
124 self.tokenQueue.append({"type": tokenTypes["ParseError"],
139 self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
141 self.stream.unget(c)
145 def consumeEntity(self, allowedChar=None, fromAttribute=False): argument
149 charStack = [self.stream.char()]
152 self.stream.unget(charStack[0])
157 charStack.append(self.stream.char())
160 charStack.append(self.stream.char())
166 self.stream.unget(charStack[-1])
167 output = self.consumeNumberEntity(hex)
170 self.tokenQueue.append({"type": tokenTypes["ParseError"],
172 self.stream.unget(charStack.pop())
184 charStack.append(self.stream.char())
198 self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
204 self.stream.unget(charStack.pop())
208 self.stream.unget(charStack.pop())
211 self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
213 self.stream.unget(charStack.pop())
217 self.currentToken["data"][-1][1] += output
223 self.tokenQueue.append({"type": tokenTypes[tokenType], "data": output})
225 def processEntityInAttribute(self, allowedChar): argument
228 self.consumeEntity(allowedChar=allowedChar, fromAttribute=True)
230 def emitCurrentToken(self): argument
235 token = self.currentToken
238 if self.lowercaseElementName:
242 self.tokenQueue.append({"type": tokenTypes["ParseError"],
245 self.tokenQueue.append({"type": tokenTypes["ParseError"],
247 self.tokenQueue.append(token)
248 self.state = self.dataState
251 def dataState(self): argument
252 data = self.stream.char()
254 self.state = self.entityDataState
256 self.state = self.tagOpenState
258 self.tokenQueue.append({"type": tokenTypes["ParseError"],
260 self.tokenQueue.append({"type": tokenTypes["Characters"],
269 self.tokenQueue.append({"type": tokenTypes["SpaceCharacters"], "data":
270 data + self.stream.charsUntil(spaceCharacters, True)})
275 chars = self.stream.charsUntil(("&", "<", "\u0000"))
276 self.tokenQueue.append({"type": tokenTypes["Characters"], "data":
280 def entityDataState(self): argument
281 self.consumeEntity()
282 self.state = self.dataState
285 def rcdataState(self): argument
286 data = self.stream.char()
288 self.state = self.characterReferenceInRcdata
290 self.state = self.rcdataLessThanSignState
295 self.tokenQueue.append({"type": tokenTypes["ParseError"],
297 self.tokenQueue.append({"type": tokenTypes["Characters"],
303 self.tokenQueue.append({"type": tokenTypes["SpaceCharacters"], "data":
304 data + self.stream.charsUntil(spaceCharacters, True)})
309 chars = self.stream.charsUntil(("&", "<", "\u0000"))
310 self.tokenQueue.append({"type": tokenTypes["Characters"], "data":
314 def characterReferenceInRcdata(self): argument
315 self.consumeEntity()
316 self.state = self.rcdataState
319 def rawtextState(self): argument
320 data = self.stream.char()
322 self.state = self.rawtextLessThanSignState
324 self.tokenQueue.append({"type": tokenTypes["ParseError"],
326 self.tokenQueue.append({"type": tokenTypes["Characters"],
332 chars = self.stream.charsUntil(("<", "\u0000"))
333 self.tokenQueue.append({"type": tokenTypes["Characters"], "data":
337 def scriptDataState(self): argument
338 data = self.stream.char()
340 self.state = self.scriptDataLessThanSignState
342 self.tokenQueue.append({"type": tokenTypes["ParseError"],
344 self.tokenQueue.append({"type": tokenTypes["Characters"],
350 chars = self.stream.charsUntil(("<", "\u0000"))
351 self.tokenQueue.append({"type": tokenTypes["Characters"], "data":
355 def plaintextState(self): argument
356 data = self.stream.char()
361 self.tokenQueue.append({"type": tokenTypes["ParseError"],
363 self.tokenQueue.append({"type": tokenTypes["Characters"],
366 self.tokenQueue.append({"type": tokenTypes["Characters"], "data":
367 data + self.stream.charsUntil("\u0000")})
370 def tagOpenState(self): argument
371 data = self.stream.char()
373 self.state = self.markupDeclarationOpenState
375 self.state = self.closeTagOpenState
377 self.currentToken = {"type": tokenTypes["StartTag"],
381 self.state = self.tagNameState
385 self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
387 self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<>"})
388 self.state = self.dataState
392 self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
394 self.stream.unget(data)
395 self.state = self.bogusCommentState
398 self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
400 self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"})
401 self.stream.unget(data)
402 self.state = self.dataState
405 def closeTagOpenState(self): argument
406 data = self.stream.char()
408 self.currentToken = {"type": tokenTypes["EndTag"], "name": data,
410 self.state = self.tagNameState
412 self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
414 self.state = self.dataState
416 self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
418 self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "</"})
419 self.state = self.dataState
422 self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
425 self.stream.unget(data)
426 self.state = self.bogusCommentState
429 def tagNameState(self): argument
430 data = self.stream.char()
432 self.state = self.beforeAttributeNameState
434 self.emitCurrentToken()
436 self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
438 self.state = self.dataState
440 self.state = self.selfClosingStartTagState
442 self.tokenQueue.append({"type": tokenTypes["ParseError"],
444 self.currentToken["name"] += "\uFFFD"
446 self.currentToken["name"] += data
451 def rcdataLessThanSignState(self): argument
452 data = self.stream.char()
454 self.temporaryBuffer = ""
455 self.state = self.rcdataEndTagOpenState
457 self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"})
458 self.stream.unget(data)
459 self.state = self.rcdataState
462 def rcdataEndTagOpenState(self): argument
463 data = self.stream.char()
465 self.temporaryBuffer += data
466 self.state = self.rcdataEndTagNameState
468 self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "</"})
469 self.stream.unget(data)
470 self.state = self.rcdataState
473 def rcdataEndTagNameState(self): argument
474 …appropriate = self.currentToken and self.currentToken["name"].lower() == self.temporaryBuffer.lowe…
475 data = self.stream.char()
477 self.currentToken = {"type": tokenTypes["EndTag"],
478 "name": self.temporaryBuffer,
480 self.state = self.beforeAttributeNameState
482 self.currentToken = {"type": tokenTypes["EndTag"],
483 "name": self.temporaryBuffer,
485 self.state = self.selfClosingStartTagState
487 self.currentToken = {"type": tokenTypes["EndTag"],
488 "name": self.temporaryBuffer,
490 self.emitCurrentToken()
491 self.state = self.dataState
493 self.temporaryBuffer += data
495 self.tokenQueue.append({"type": tokenTypes["Characters"],
496 "data": "</" + self.temporaryBuffer})
497 self.stream.unget(data)
498 self.state = self.rcdataState
501 def rawtextLessThanSignState(self): argument
502 data = self.stream.char()
504 self.temporaryBuffer = ""
505 self.state = self.rawtextEndTagOpenState
507 self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"})
508 self.stream.unget(data)
509 self.state = self.rawtextState
512 def rawtextEndTagOpenState(self): argument
513 data = self.stream.char()
515 self.temporaryBuffer += data
516 self.state = self.rawtextEndTagNameState
518 self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "</"})
519 self.stream.unget(data)
520 self.state = self.rawtextState
523 def rawtextEndTagNameState(self): argument
524 …appropriate = self.currentToken and self.currentToken["name"].lower() == self.temporaryBuffer.lowe…
525 data = self.stream.char()
527 self.currentToken = {"type": tokenTypes["EndTag"],
528 "name": self.temporaryBuffer,
530 self.state = self.beforeAttributeNameState
532 self.currentToken = {"type": tokenTypes["EndTag"],
533 "name": self.temporaryBuffer,
535 self.state = self.selfClosingStartTagState
537 self.currentToken = {"type": tokenTypes["EndTag"],
538 "name": self.temporaryBuffer,
540 self.emitCurrentToken()
541 self.state = self.dataState
543 self.temporaryBuffer += data
545 self.tokenQueue.append({"type": tokenTypes["Characters"],
546 "data": "</" + self.temporaryBuffer})
547 self.stream.unget(data)
548 self.state = self.rawtextState
551 def scriptDataLessThanSignState(self): argument
552 data = self.stream.char()
554 self.temporaryBuffer = ""
555 self.state = self.scriptDataEndTagOpenState
557 self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<!"})
558 self.state = self.scriptDataEscapeStartState
560 self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"})
561 self.stream.unget(data)
562 self.state = self.scriptDataState
565 def scriptDataEndTagOpenState(self): argument
566 data = self.stream.char()
568 self.temporaryBuffer += data
569 self.state = self.scriptDataEndTagNameState
571 self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "</"})
572 self.stream.unget(data)
573 self.state = self.scriptDataState
576 def scriptDataEndTagNameState(self): argument
577 …appropriate = self.currentToken and self.currentToken["name"].lower() == self.temporaryBuffer.lowe…
578 data = self.stream.char()
580 self.currentToken = {"type": tokenTypes["EndTag"],
581 "name": self.temporaryBuffer,
583 self.state = self.beforeAttributeNameState
585 self.currentToken = {"type": tokenTypes["EndTag"],
586 "name": self.temporaryBuffer,
588 self.state = self.selfClosingStartTagState
590 self.currentToken = {"type": tokenTypes["EndTag"],
591 "name": self.temporaryBuffer,
593 self.emitCurrentToken()
594 self.state = self.dataState
596 self.temporaryBuffer += data
598 self.tokenQueue.append({"type": tokenTypes["Characters"],
599 "data": "</" + self.temporaryBuffer})
600 self.stream.unget(data)
601 self.state = self.scriptDataState
604 def scriptDataEscapeStartState(self): argument
605 data = self.stream.char()
607 self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"})
608 self.state = self.scriptDataEscapeStartDashState
610 self.stream.unget(data)
611 self.state = self.scriptDataState
614 def scriptDataEscapeStartDashState(self): argument
615 data = self.stream.char()
617 self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"})
618 self.state = self.scriptDataEscapedDashDashState
620 self.stream.unget(data)
621 self.state = self.scriptDataState
624 def scriptDataEscapedState(self): argument
625 data = self.stream.char()
627 self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"})
628 self.state = self.scriptDataEscapedDashState
630 self.state = self.scriptDataEscapedLessThanSignState
632 self.tokenQueue.append({"type": tokenTypes["ParseError"],
634 self.tokenQueue.append({"type": tokenTypes["Characters"],
637 self.state = self.dataState
639 chars = self.stream.charsUntil(("<", "-", "\u0000"))
640 self.tokenQueue.append({"type": tokenTypes["Characters"], "data":
644 def scriptDataEscapedDashState(self): argument
645 data = self.stream.char()
647 self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"})
648 self.state = self.scriptDataEscapedDashDashState
650 self.state = self.scriptDataEscapedLessThanSignState
652 self.tokenQueue.append({"type": tokenTypes["ParseError"],
654 self.tokenQueue.append({"type": tokenTypes["Characters"],
656 self.state = self.scriptDataEscapedState
658 self.state = self.dataState
660 self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data})
661 self.state = self.scriptDataEscapedState
664 def scriptDataEscapedDashDashState(self): argument
665 data = self.stream.char()
667 self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"})
669 self.state = self.scriptDataEscapedLessThanSignState
671 self.tokenQueue.append({"type": tokenTypes["Characters"], "data": ">"})
672 self.state = self.scriptDataState
674 self.tokenQueue.append({"type": tokenTypes["ParseError"],
676 self.tokenQueue.append({"type": tokenTypes["Characters"],
678 self.state = self.scriptDataEscapedState
680 self.state = self.dataState
682 self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data})
683 self.state = self.scriptDataEscapedState
686 def scriptDataEscapedLessThanSignState(self): argument
687 data = self.stream.char()
689 self.temporaryBuffer = ""
690 self.state = self.scriptDataEscapedEndTagOpenState
692 self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<" + data})
693 self.temporaryBuffer = data
694 self.state = self.scriptDataDoubleEscapeStartState
696 self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"})
697 self.stream.unget(data)
698 self.state = self.scriptDataEscapedState
701 def scriptDataEscapedEndTagOpenState(self): argument
702 data = self.stream.char()
704 self.temporaryBuffer = data
705 self.state = self.scriptDataEscapedEndTagNameState
707 self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "</"})
708 self.stream.unget(data)
709 self.state = self.scriptDataEscapedState
712 def scriptDataEscapedEndTagNameState(self): argument
713 …appropriate = self.currentToken and self.currentToken["name"].lower() == self.temporaryBuffer.lowe…
714 data = self.stream.char()
716 self.currentToken = {"type": tokenTypes["EndTag"],
717 "name": self.temporaryBuffer,
719 self.state = self.beforeAttributeNameState
721 self.currentToken = {"type": tokenTypes["EndTag"],
722 "name": self.temporaryBuffer,
724 self.state = self.selfClosingStartTagState
726 self.currentToken = {"type": tokenTypes["EndTag"],
727 "name": self.temporaryBuffer,
729 self.emitCurrentToken()
730 self.state = self.dataState
732 self.temporaryBuffer += data
734 self.tokenQueue.append({"type": tokenTypes["Characters"],
735 "data": "</" + self.temporaryBuffer})
736 self.stream.unget(data)
737 self.state = self.scriptDataEscapedState
740 def scriptDataDoubleEscapeStartState(self): argument
741 data = self.stream.char()
743 self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data})
744 if self.temporaryBuffer.lower() == "script":
745 self.state = self.scriptDataDoubleEscapedState
747 self.state = self.scriptDataEscapedState
749 self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data})
750 self.temporaryBuffer += data
752 self.stream.unget(data)
753 self.state = self.scriptDataEscapedState
756 def scriptDataDoubleEscapedState(self): argument
757 data = self.stream.char()
759 self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"})
760 self.state = self.scriptDataDoubleEscapedDashState
762 self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"})
763 self.state = self.scriptDataDoubleEscapedLessThanSignState
765 self.tokenQueue.append({"type": tokenTypes["ParseError"],
767 self.tokenQueue.append({"type": tokenTypes["Characters"],
770 self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
772 self.state = self.dataState
774 self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data})
777 def scriptDataDoubleEscapedDashState(self): argument
778 data = self.stream.char()
780 self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"})
781 self.state = self.scriptDataDoubleEscapedDashDashState
783 self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"})
784 self.state = self.scriptDataDoubleEscapedLessThanSignState
786 self.tokenQueue.append({"type": tokenTypes["ParseError"],
788 self.tokenQueue.append({"type": tokenTypes["Characters"],
790 self.state = self.scriptDataDoubleEscapedState
792 self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
794 self.state = self.dataState
796 self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data})
797 self.state = self.scriptDataDoubleEscapedState
800 def scriptDataDoubleEscapedDashDashState(self): argument
801 data = self.stream.char()
803 self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"})
805 self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"})
806 self.state = self.scriptDataDoubleEscapedLessThanSignState
808 self.tokenQueue.append({"type": tokenTypes["Characters"], "data": ">"})
809 self.state = self.scriptDataState
811 self.tokenQueue.append({"type": tokenTypes["ParseError"],
813 self.tokenQueue.append({"type": tokenTypes["Characters"],
815 self.state = self.scriptDataDoubleEscapedState
817 self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
819 self.state = self.dataState
821 self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data})
822 self.state = self.scriptDataDoubleEscapedState
825 def scriptDataDoubleEscapedLessThanSignState(self): argument
826 data = self.stream.char()
828 self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "/"})
829 self.temporaryBuffer = ""
830 self.state = self.scriptDataDoubleEscapeEndState
832 self.stream.unget(data)
833 self.state = self.scriptDataDoubleEscapedState
836 def scriptDataDoubleEscapeEndState(self): argument
837 data = self.stream.char()
839 self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data})
840 if self.temporaryBuffer.lower() == "script":
841 self.state = self.scriptDataEscapedState
843 self.state = self.scriptDataDoubleEscapedState
845 self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data})
846 self.temporaryBuffer += data
848 self.stream.unget(data)
849 self.state = self.scriptDataDoubleEscapedState
852 def beforeAttributeNameState(self): argument
853 data = self.stream.char()
855 self.stream.charsUntil(spaceCharacters, True)
857 self.currentToken["data"].append([data, ""])
858 self.state = self.attributeNameState
860 self.emitCurrentToken()
862 self.state = self.selfClosingStartTagState
864 self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
866 self.currentToken["data"].append([data, ""])
867 self.state = self.attributeNameState
869 self.tokenQueue.append({"type": tokenTypes["ParseError"],
871 self.currentToken["data"].append(["\uFFFD", ""])
872 self.state = self.attributeNameState
874 self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
876 self.state = self.dataState
878 self.currentToken["data"].append([data, ""])
879 self.state = self.attributeNameState
882 def attributeNameState(self): argument
883 data = self.stream.char()
887 self.state = self.beforeAttributeValueState
889 self.currentToken["data"][-1][0] += data +\
890 self.stream.charsUntil(asciiLetters, True)
898 self.state = self.afterAttributeNameState
900 self.state = self.selfClosingStartTagState
902 self.tokenQueue.append({"type": tokenTypes["ParseError"],
904 self.currentToken["data"][-1][0] += "\uFFFD"
907 self.tokenQueue.append({"type": tokenTypes["ParseError"],
910 self.currentToken["data"][-1][0] += data
913 self.tokenQueue.append({"type": tokenTypes["ParseError"],
915 self.state = self.dataState
917 self.currentToken["data"][-1][0] += data
924 if self.lowercaseAttrName:
925 self.currentToken["data"][-1][0] = (
926 self.currentToken["data"][-1][0].translate(asciiUpper2Lower))
927 for name, value in self.currentToken["data"][:-1]:
928 if self.currentToken["data"][-1][0] == name:
929 self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
934 self.emitCurrentToken()
937 def afterAttributeNameState(self): argument
938 data = self.stream.char()
940 self.stream.charsUntil(spaceCharacters, True)
942 self.state = self.beforeAttributeValueState
944 self.emitCurrentToken()
946 self.currentToken["data"].append([data, ""])
947 self.state = self.attributeNameState
949 self.state = self.selfClosingStartTagState
951 self.tokenQueue.append({"type": tokenTypes["ParseError"],
953 self.currentToken["data"].append(["\uFFFD", ""])
954 self.state = self.attributeNameState
956 self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
958 self.currentToken["data"].append([data, ""])
959 self.state = self.attributeNameState
961 self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
963 self.state = self.dataState
965 self.currentToken["data"].append([data, ""])
966 self.state = self.attributeNameState
969 def beforeAttributeValueState(self): argument
970 data = self.stream.char()
972 self.stream.charsUntil(spaceCharacters, True)
974 self.state = self.attributeValueDoubleQuotedState
976 self.state = self.attributeValueUnQuotedState
977 self.stream.unget(data)
979 self.state = self.attributeValueSingleQuotedState
981 self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
983 self.emitCurrentToken()
985 self.tokenQueue.append({"type": tokenTypes["ParseError"],
987 self.currentToken["data"][-1][1] += "\uFFFD"
988 self.state = self.attributeValueUnQuotedState
990 self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
992 self.currentToken["data"][-1][1] += data
993 self.state = self.attributeValueUnQuotedState
995 self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
997 self.state = self.dataState
999 self.currentToken["data"][-1][1] += data
1000 self.state = self.attributeValueUnQuotedState
1003 def attributeValueDoubleQuotedState(self): argument
1004 data = self.stream.char()
1006 self.state = self.afterAttributeValueState
1008 self.processEntityInAttribute('"')
1010 self.tokenQueue.append({"type": tokenTypes["ParseError"],
1012 self.currentToken["data"][-1][1] += "\uFFFD"
1014 self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
1016 self.state = self.dataState
1018 self.currentToken["data"][-1][1] += data +\
1019 self.stream.charsUntil(("\"", "&", "\u0000"))
1022 def attributeValueSingleQuotedState(self): argument
1023 data = self.stream.char()
1025 self.state = self.afterAttributeValueState
1027 self.processEntityInAttribute("'")
1029 self.tokenQueue.append({"type": tokenTypes["ParseError"],
1031 self.currentToken["data"][-1][1] += "\uFFFD"
1033 self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
1035 self.state = self.dataState
1037 self.currentToken["data"][-1][1] += data +\
1038 self.stream.charsUntil(("'", "&", "\u0000"))
1041 def attributeValueUnQuotedState(self): argument
1042 data = self.stream.char()
1044 self.state = self.beforeAttributeNameState
1046 self.processEntityInAttribute(">")
1048 self.emitCurrentToken()
1050 self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
1052 self.currentToken["data"][-1][1] += data
1054 self.tokenQueue.append({"type": tokenTypes["ParseError"],
1056 self.currentToken["data"][-1][1] += "\uFFFD"
1058 self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
1060 self.state = self.dataState
1062 self.currentToken["data"][-1][1] += data + self.stream.charsUntil(
1066 def afterAttributeValueState(self): argument
1067 data = self.stream.char()
1069 self.state = self.beforeAttributeNameState
1071 self.emitCurrentToken()
1073 self.state = self.selfClosingStartTagState
1075 self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
1077 self.stream.unget(data)
1078 self.state = self.dataState
1080 self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
1082 self.stream.unget(data)
1083 self.state = self.beforeAttributeNameState
1086 def selfClosingStartTagState(self): argument
1087 data = self.stream.char()
1089 self.currentToken["selfClosing"] = True
1090 self.emitCurrentToken()
1092 self.tokenQueue.append({"type": tokenTypes["ParseError"],
1095 self.stream.unget(data)
1096 self.state = self.dataState
1098 self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
1100 self.stream.unget(data)
1101 self.state = self.beforeAttributeNameState
1104 def bogusCommentState(self): argument
1108 data = self.stream.charsUntil(">")
1110 self.tokenQueue.append(
1115 self.stream.char()
1116 self.state = self.dataState
1119 def markupDeclarationOpenState(self): argument
1120 charStack = [self.stream.char()]
1122 charStack.append(self.stream.char())
1124 self.currentToken = {"type": tokenTypes["Comment"], "data": ""}
1125 self.state = self.commentStartState
1131 charStack.append(self.stream.char())
1136 self.currentToken = {"type": tokenTypes["Doctype"],
1140 self.state = self.doctypeState
1143 self.parser is not None and
1144 self.parser.tree.openElements and
1145 self.parser.tree.openElements[-1].namespace != self.parser.tree.defaultNamespace):
1148 charStack.append(self.stream.char())
1153 self.state = self.cdataSectionState
1156 self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
1160 self.stream.unget(charStack.pop())
1161 self.state = self.bogusCommentState
1164 def commentStartState(self): argument
1165 data = self.stream.char()
1167 self.state = self.commentStartDashState
1169 self.tokenQueue.append({"type": tokenTypes["ParseError"],
1171 self.currentToken["data"] += "\uFFFD"
1173 self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
1175 self.tokenQueue.append(self.currentToken)
1176 self.state = self.dataState
1178 self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
1180 self.tokenQueue.append(self.currentToken)
1181 self.state = self.dataState
1183 self.currentToken["data"] += data
1184 self.state = self.commentState
1187 def commentStartDashState(self): argument
1188 data = self.stream.char()
1190 self.state = self.commentEndState
1192 self.tokenQueue.append({"type": tokenTypes["ParseError"],
1194 self.currentToken["data"] += "-\uFFFD"
1196 self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
1198 self.tokenQueue.append(self.currentToken)
1199 self.state = self.dataState
1201 self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
1203 self.tokenQueue.append(self.currentToken)
1204 self.state = self.dataState
1206 self.currentToken["data"] += "-" + data
1207 self.state = self.commentState
1210 def commentState(self): argument
1211 data = self.stream.char()
1213 self.state = self.commentEndDashState
1215 self.tokenQueue.append({"type": tokenTypes["ParseError"],
1217 self.currentToken["data"] += "\uFFFD"
1219 self.tokenQueue.append({"type": tokenTypes["ParseError"],
1221 self.tokenQueue.append(self.currentToken)
1222 self.state = self.dataState
1224 self.currentToken["data"] += data + \
1225 self.stream.charsUntil(("-", "\u0000"))
1228 def commentEndDashState(self): argument
1229 data = self.stream.char()
1231 self.state = self.commentEndState
1233 self.tokenQueue.append({"type": tokenTypes["ParseError"],
1235 self.currentToken["data"] += "-\uFFFD"
1236 self.state = self.commentState
1238 self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
1240 self.tokenQueue.append(self.currentToken)
1241 self.state = self.dataState
1243 self.currentToken["data"] += "-" + data
1244 self.state = self.commentState
1247 def commentEndState(self): argument
1248 data = self.stream.char()
1250 self.tokenQueue.append(self.currentToken)
1251 self.state = self.dataState
1253 self.tokenQueue.append({"type": tokenTypes["ParseError"],
1255 self.currentToken["data"] += "--\uFFFD"
1256 self.state = self.commentState
1258 self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
1260 self.state = self.commentEndBangState
1262 self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
1264 self.currentToken["data"] += data
1266 self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
1268 self.tokenQueue.append(self.currentToken)
1269 self.state = self.dataState
1272 self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
1274 self.currentToken["data"] += "--" + data
1275 self.state = self.commentState
1278 def commentEndBangState(self): argument
1279 data = self.stream.char()
1281 self.tokenQueue.append(self.currentToken)
1282 self.state = self.dataState
1284 self.currentToken["data"] += "--!"
1285 self.state = self.commentEndDashState
1287 self.tokenQueue.append({"type": tokenTypes["ParseError"],
1289 self.currentToken["data"] += "--!\uFFFD"
1290 self.state = self.commentState
1292 self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
1294 self.tokenQueue.append(self.currentToken)
1295 self.state = self.dataState
1297 self.currentToken["data"] += "--!" + data
1298 self.state = self.commentState
1301 def doctypeState(self): argument
1302 data = self.stream.char()
1304 self.state = self.beforeDoctypeNameState
1306 self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
1308 self.currentToken["correct"] = False
1309 self.tokenQueue.append(self.currentToken)
1310 self.state = self.dataState
1312 self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
1314 self.stream.unget(data)
1315 self.state = self.beforeDoctypeNameState
1318 def beforeDoctypeNameState(self): argument
1319 data = self.stream.char()
1323 self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
1325 self.currentToken["correct"] = False
1326 self.tokenQueue.append(self.currentToken)
1327 self.state = self.dataState
1329 self.tokenQueue.append({"type": tokenTypes["ParseError"],
1331 self.currentToken["name"] = "\uFFFD"
1332 self.state = self.doctypeNameState
1334 self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
1336 self.currentToken["correct"] = False
1337 self.tokenQueue.append(self.currentToken)
1338 self.state = self.dataState
1340 self.currentToken["name"] = data
1341 self.state = self.doctypeNameState
1344 def doctypeNameState(self): argument
1345 data = self.stream.char()
1347 self.currentToken["name"] = self.currentToken["name"].translate(asciiUpper2Lower)
1348 self.state = self.afterDoctypeNameState
1350 self.currentToken["name"] = self.currentToken["name"].translate(asciiUpper2Lower)
1351 self.tokenQueue.append(self.currentToken)
1352 self.state = self.dataState
1354 self.tokenQueue.append({"type": tokenTypes["ParseError"],
1356 self.currentToken["name"] += "\uFFFD"
1357 self.state = self.doctypeNameState
1359 self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
1361 self.currentToken["correct"] = False
1362 self.currentToken["name"] = self.currentToken["name"].translate(asciiUpper2Lower)
1363 self.tokenQueue.append(self.currentToken)
1364 self.state = self.dataState
1366 self.currentToken["name"] += data
1369 def afterDoctypeNameState(self): argument
1370 data = self.stream.char()
1374 self.tokenQueue.append(self.currentToken)
1375 self.state = self.dataState
1377 self.currentToken["correct"] = False
1378 self.stream.unget(data)
1379 self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
1381 self.tokenQueue.append(self.currentToken)
1382 self.state = self.dataState
1388 data = self.stream.char()
1393 self.state = self.afterDoctypePublicKeywordState
1399 data = self.stream.char()
1404 self.state = self.afterDoctypeSystemKeywordState
1411 self.stream.unget(data)
1412 self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
1415 self.currentToken["correct"] = False
1416 self.state = self.bogusDoctypeState
1420 def afterDoctypePublicKeywordState(self): argument
1421 data = self.stream.char()
1423 self.state = self.beforeDoctypePublicIdentifierState
1425 self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
1427 self.stream.unget(data)
1428 self.state = self.beforeDoctypePublicIdentifierState
1430 self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
1432 self.currentToken["correct"] = False
1433 self.tokenQueue.append(self.currentToken)
1434 self.state = self.dataState
1436 self.stream.unget(data)
1437 self.state = self.beforeDoctypePublicIdentifierState
1440 def beforeDoctypePublicIdentifierState(self): argument
1441 data = self.stream.char()
1445 self.currentToken["publicId"] = ""
1446 self.state = self.doctypePublicIdentifierDoubleQuotedState
1448 self.currentToken["publicId"] = ""
1449 self.state = self.doctypePublicIdentifierSingleQuotedState
1451 self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
1453 self.currentToken["correct"] = False
1454 self.tokenQueue.append(self.currentToken)
1455 self.state = self.dataState
1457 self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
1459 self.currentToken["correct"] = False
1460 self.tokenQueue.append(self.currentToken)
1461 self.state = self.dataState
1463 self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
1465 self.currentToken["correct"] = False
1466 self.state = self.bogusDoctypeState
1469 def doctypePublicIdentifierDoubleQuotedState(self): argument
1470 data = self.stream.char()
1472 self.state = self.afterDoctypePublicIdentifierState
1474 self.tokenQueue.append({"type": tokenTypes["ParseError"],
1476 self.currentToken["publicId"] += "\uFFFD"
1478 self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
1480 self.currentToken["correct"] = False
1481 self.tokenQueue.append(self.currentToken)
1482 self.state = self.dataState
1484 self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
1486 self.currentToken["correct"] = False
1487 self.tokenQueue.append(self.currentToken)
1488 self.state = self.dataState
1490 self.currentToken["publicId"] += data
1493 def doctypePublicIdentifierSingleQuotedState(self): argument
1494 data = self.stream.char()
1496 self.state = self.afterDoctypePublicIdentifierState
1498 self.tokenQueue.append({"type": tokenTypes["ParseError"],
1500 self.currentToken["publicId"] += "\uFFFD"
1502 self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
1504 self.currentToken["correct"] = False
1505 self.tokenQueue.append(self.currentToken)
1506 self.state = self.dataState
1508 self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
1510 self.currentToken["correct"] = False
1511 self.tokenQueue.append(self.currentToken)
1512 self.state = self.dataState
1514 self.currentToken["publicId"] += data
1517 def afterDoctypePublicIdentifierState(self): argument
1518 data = self.stream.char()
1520 self.state = self.betweenDoctypePublicAndSystemIdentifiersState
1522 self.tokenQueue.append(self.currentToken)
1523 self.state = self.dataState
1525 self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
1527 self.currentToken["systemId"] = ""
1528 self.state = self.doctypeSystemIdentifierDoubleQuotedState
1530 self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
1532 self.currentToken["systemId"] = ""
1533 self.state = self.doctypeSystemIdentifierSingleQuotedState
1535 self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
1537 self.currentToken["correct"] = False
1538 self.tokenQueue.append(self.currentToken)
1539 self.state = self.dataState
1541 self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
1543 self.currentToken["correct"] = False
1544 self.state = self.bogusDoctypeState
1547 def betweenDoctypePublicAndSystemIdentifiersState(self): argument
1548 data = self.stream.char()
1552 self.tokenQueue.append(self.currentToken)
1553 self.state = self.dataState
1555 self.currentToken["systemId"] = ""
1556 self.state = self.doctypeSystemIdentifierDoubleQuotedState
1558 self.currentToken["systemId"] = ""
1559 self.state = self.doctypeSystemIdentifierSingleQuotedState
1561 self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
1563 self.currentToken["correct"] = False
1564 self.tokenQueue.append(self.currentToken)
1565 self.state = self.dataState
1567 self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
1569 self.currentToken["correct"] = False
1570 self.state = self.bogusDoctypeState
1573 def afterDoctypeSystemKeywordState(self): argument
1574 data = self.stream.char()
1576 self.state = self.beforeDoctypeSystemIdentifierState
1578 self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
1580 self.stream.unget(data)
1581 self.state = self.beforeDoctypeSystemIdentifierState
1583 self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
1585 self.currentToken["correct"] = False
1586 self.tokenQueue.append(self.currentToken)
1587 self.state = self.dataState
1589 self.stream.unget(data)
1590 self.state = self.beforeDoctypeSystemIdentifierState
1593 def beforeDoctypeSystemIdentifierState(self): argument
1594 data = self.stream.char()
1598 self.currentToken["systemId"] = ""
1599 self.state = self.doctypeSystemIdentifierDoubleQuotedState
1601 self.currentToken["systemId"] = ""
1602 self.state = self.doctypeSystemIdentifierSingleQuotedState
1604 self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
1606 self.currentToken["correct"] = False
1607 self.tokenQueue.append(self.currentToken)
1608 self.state = self.dataState
1610 self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
1612 self.currentToken["correct"] = False
1613 self.tokenQueue.append(self.currentToken)
1614 self.state = self.dataState
1616 self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
1618 self.currentToken["correct"] = False
1619 self.state = self.bogusDoctypeState
1622 def doctypeSystemIdentifierDoubleQuotedState(self): argument
1623 data = self.stream.char()
1625 self.state = self.afterDoctypeSystemIdentifierState
1627 self.tokenQueue.append({"type": tokenTypes["ParseError"],
1629 self.currentToken["systemId"] += "\uFFFD"
1631 self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
1633 self.currentToken["correct"] = False
1634 self.tokenQueue.append(self.currentToken)
1635 self.state = self.dataState
1637 self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
1639 self.currentToken["correct"] = False
1640 self.tokenQueue.append(self.currentToken)
1641 self.state = self.dataState
1643 self.currentToken["systemId"] += data
1646 def doctypeSystemIdentifierSingleQuotedState(self): argument
1647 data = self.stream.char()
1649 self.state = self.afterDoctypeSystemIdentifierState
1651 self.tokenQueue.append({"type": tokenTypes["ParseError"],
1653 self.currentToken["systemId"] += "\uFFFD"
1655 self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
1657 self.currentToken["correct"] = False
1658 self.tokenQueue.append(self.currentToken)
1659 self.state = self.dataState
1661 self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
1663 self.currentToken["correct"] = False
1664 self.tokenQueue.append(self.currentToken)
1665 self.state = self.dataState
1667 self.currentToken["systemId"] += data
1670 def afterDoctypeSystemIdentifierState(self): argument
1671 data = self.stream.char()
1675 self.tokenQueue.append(self.currentToken)
1676 self.state = self.dataState
1678 self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
1680 self.currentToken["correct"] = False
1681 self.tokenQueue.append(self.currentToken)
1682 self.state = self.dataState
1684 self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
1686 self.state = self.bogusDoctypeState
1689 def bogusDoctypeState(self): argument
1690 data = self.stream.char()
1692 self.tokenQueue.append(self.currentToken)
1693 self.state = self.dataState
1696 self.stream.unget(data)
1697 self.tokenQueue.append(self.currentToken)
1698 self.state = self.dataState
1703 def cdataSectionState(self): argument
1706 data.append(self.stream.charsUntil("]"))
1707 data.append(self.stream.charsUntil(">"))
1708 char = self.stream.char()
1724 self.tokenQueue.append({"type": tokenTypes["ParseError"],
1728 self.tokenQueue.append({"type": tokenTypes["Characters"],
1730 self.state = self.dataState