1// Copyright (C) 2006 Google Inc. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15 16/** 17 * @fileoverview 18 * some functions for browser-side pretty printing of code contained in html. 19 * 20 * The lexer should work on a number of languages including C and friends, 21 * Java, Python, Bash, SQL, HTML, XML, CSS, Javascript, and Makefiles. 22 * It works passably on Ruby, PHP and Awk and a decent subset of Perl, but, 23 * because of commenting conventions, doesn't work on Smalltalk, Lisp-like, or 24 * CAML-like languages. 25 * 26 * If there's a language not mentioned here, then I don't know it, and don't 27 * know whether it works. If it has a C-like, Bash-like, or XML-like syntax 28 * then it should work passably. 29 * 30 * Usage: 31 * 1) include this source file in an html page via 32 * <script type="text/javascript" src="/path/to/prettify.js"></script> 33 * 2) define style rules. See the example page for examples. 34 * 3) mark the <pre> and <code> tags in your source with class=prettyprint. 35 * You can also use the (html deprecated) <xmp> tag, but the pretty printer 36 * needs to do more substantial DOM manipulations to support that, so some 37 * css styles may not be preserved. 38 * That's it. I wanted to keep the API as simple as possible, so there's no 39 * need to specify which language the code is in. 40 * 41 * Change log: 42 * cbeust, 2006/08/22 43 * Java annotations (start with "@") are now captured as literals ("lit") 44 */ 45 46// JSLint declarations 47/*global console, document, navigator, setTimeout, window */ 48 49/** 50 * Split {@code prettyPrint} into multiple timeouts so as not to interfere with 51 * UI events. 52 * If set to {@code false}, {@code prettyPrint()} is synchronous. 53 */ 54var PR_SHOULD_USE_CONTINUATION = true; 55 56/** the number of characters between tab columns */ 57var PR_TAB_WIDTH = 8; 58 59/** Walks the DOM returning a properly escaped version of innerHTML. 60 * @param {Node} node 61 * @param {Array.<string>} out output buffer that receives chunks of HTML. 62 */ 63var PR_normalizedHtml; 64 65/** Contains functions for creating and registering new language handlers. 66 * @type {Object} 67 */ 68var PR; 69 70/** Pretty print a chunk of code. 71 * 72 * @param {string} sourceCodeHtml code as html 73 * @return {string} code as html, but prettier 74 */ 75var prettyPrintOne; 76/** find all the < pre > and < code > tags in the DOM with class=prettyprint 77 * and prettify them. 78 * @param {Function} opt_whenDone if specified, called when the last entry 79 * has been finished. 80 */ 81var prettyPrint; 82 83/** browser detection. @extern */ 84function _pr_isIE6() { 85 var isIE6 = navigator && navigator.userAgent && 86 /\bMSIE 6\./.test(navigator.userAgent); 87 _pr_isIE6 = function () { return isIE6; }; 88 return isIE6; 89} 90 91 92(function () { 93 /** Splits input on space and returns an Object mapping each non-empty part to 94 * true. 95 */ 96 function wordSet(words) { 97 words = words.split(/ /g); 98 var set = {}; 99 for (var i = words.length; --i >= 0;) { 100 var w = words[i]; 101 if (w) { set[w] = null; } 102 } 103 return set; 104 } 105 106 // Keyword lists for various languages. 107 var FLOW_CONTROL_KEYWORDS = 108 "break continue do else for if return while "; 109 var C_KEYWORDS = FLOW_CONTROL_KEYWORDS + "auto case char const default " + 110 "double enum extern float goto int long register short signed sizeof " + 111 "static struct switch typedef union unsigned void volatile "; 112 var COMMON_KEYWORDS = C_KEYWORDS + "catch class delete false import " + 113 "new operator private protected public this throw true try "; 114 var CPP_KEYWORDS = COMMON_KEYWORDS + "alignof align_union asm axiom bool " + 115 "concept concept_map const_cast constexpr decltype " + 116 "dynamic_cast explicit export friend inline late_check " + 117 "mutable namespace nullptr reinterpret_cast static_assert static_cast " + 118 "template typeid typename typeof using virtual wchar_t where "; 119 var JAVA_KEYWORDS = COMMON_KEYWORDS + 120 "boolean byte extends final finally implements import instanceof null " + 121 "native package strictfp super synchronized throws transient "; 122 var CSHARP_KEYWORDS = JAVA_KEYWORDS + 123 "as base by checked decimal delegate descending event " + 124 "fixed foreach from group implicit in interface internal into is lock " + 125 "object out override orderby params readonly ref sbyte sealed " + 126 "stackalloc string select uint ulong unchecked unsafe ushort var "; 127 var JSCRIPT_KEYWORDS = COMMON_KEYWORDS + 128 "debugger eval export function get null set undefined var with " + 129 "Infinity NaN "; 130 var PERL_KEYWORDS = "caller delete die do dump elsif eval exit foreach for " + 131 "goto if import last local my next no our print package redo require " + 132 "sub undef unless until use wantarray while BEGIN END "; 133 var PYTHON_KEYWORDS = FLOW_CONTROL_KEYWORDS + "and as assert class def del " + 134 "elif except exec finally from global import in is lambda " + 135 "nonlocal not or pass print raise try with yield " + 136 "False True None "; 137 var RUBY_KEYWORDS = FLOW_CONTROL_KEYWORDS + "alias and begin case class def" + 138 " defined elsif end ensure false in module next nil not or redo rescue " + 139 "retry self super then true undef unless until when yield BEGIN END "; 140 var SH_KEYWORDS = FLOW_CONTROL_KEYWORDS + "case done elif esac eval fi " + 141 "function in local set then until "; 142 var ALL_KEYWORDS = ( 143 CPP_KEYWORDS + CSHARP_KEYWORDS + JSCRIPT_KEYWORDS + PERL_KEYWORDS + 144 PYTHON_KEYWORDS + RUBY_KEYWORDS + SH_KEYWORDS); 145 146 // token style names. correspond to css classes 147 /** token style for a string literal */ 148 var PR_STRING = 'str'; 149 /** token style for a keyword */ 150 var PR_KEYWORD = 'kwd'; 151 /** token style for a comment */ 152 var PR_COMMENT = 'com'; 153 /** token style for a type */ 154 var PR_TYPE = 'typ'; 155 /** token style for a literal value. e.g. 1, null, true. */ 156 var PR_LITERAL = 'lit'; 157 /** token style for a punctuation string. */ 158 var PR_PUNCTUATION = 'pun'; 159 /** token style for a punctuation string. */ 160 var PR_PLAIN = 'pln'; 161 162 /** token style for an sgml tag. */ 163 var PR_TAG = 'tag'; 164 /** token style for a markup declaration such as a DOCTYPE. */ 165 var PR_DECLARATION = 'dec'; 166 /** token style for embedded source. */ 167 var PR_SOURCE = 'src'; 168 /** token style for an sgml attribute name. */ 169 var PR_ATTRIB_NAME = 'atn'; 170 /** token style for an sgml attribute value. */ 171 var PR_ATTRIB_VALUE = 'atv'; 172 173 /** 174 * A class that indicates a section of markup that is not code, e.g. to allow 175 * embedding of line numbers within code listings. 176 */ 177 var PR_NOCODE = 'nocode'; 178 179 function isWordChar(ch) { 180 return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'); 181 } 182 183 /** Splice one array into another. 184 * Like the python <code> 185 * container[containerPosition:containerPosition + countReplaced] = inserted 186 * </code> 187 * @param {Array} inserted 188 * @param {Array} container modified in place 189 * @param {Number} containerPosition 190 * @param {Number} countReplaced 191 */ 192 function spliceArrayInto( 193 inserted, container, containerPosition, countReplaced) { 194 inserted.unshift(containerPosition, countReplaced || 0); 195 try { 196 container.splice.apply(container, inserted); 197 } finally { 198 inserted.splice(0, 2); 199 } 200 } 201 202 /** A set of tokens that can precede a regular expression literal in 203 * javascript. 204 * http://www.mozilla.org/js/language/js20/rationale/syntax.html has the full 205 * list, but I've removed ones that might be problematic when seen in 206 * languages that don't support regular expression literals. 207 * 208 * <p>Specifically, I've removed any keywords that can't precede a regexp 209 * literal in a syntactically legal javascript program, and I've removed the 210 * "in" keyword since it's not a keyword in many languages, and might be used 211 * as a count of inches. 212 * @private 213 */ 214 var REGEXP_PRECEDER_PATTERN = function () { 215 var preceders = [ 216 "!", "!=", "!==", "#", "%", "%=", "&", "&&", "&&=", 217 "&=", "(", "*", "*=", /* "+", */ "+=", ",", /* "-", */ "-=", 218 "->", /*".", "..", "...", handled below */ "/", "/=", ":", "::", ";", 219 "<", "<<", "<<=", "<=", "=", "==", "===", ">", 220 ">=", ">>", ">>=", ">>>", ">>>=", "?", "@", "[", 221 "^", "^=", "^^", "^^=", "{", "|", "|=", "||", 222 "||=", "~" /* handles =~ and !~ */, 223 "break", "case", "continue", "delete", 224 "do", "else", "finally", "instanceof", 225 "return", "throw", "try", "typeof" 226 ]; 227 var pattern = '(?:' + 228 '(?:(?:^|[^0-9.])\\.{1,3})|' + // a dot that's not part of a number 229 '(?:(?:^|[^\\+])\\+)|' + // allow + but not ++ 230 '(?:(?:^|[^\\-])-)'; // allow - but not -- 231 for (var i = 0; i < preceders.length; ++i) { 232 var preceder = preceders[i]; 233 if (isWordChar(preceder.charAt(0))) { 234 pattern += '|\\b' + preceder; 235 } else { 236 pattern += '|' + preceder.replace(/([^=<>:&])/g, '\\$1'); 237 } 238 } 239 pattern += '|^)\\s*$'; // matches at end, and matches empty string 240 return new RegExp(pattern); 241 // CAVEAT: this does not properly handle the case where a regular 242 // expression immediately follows another since a regular expression may 243 // have flags for case-sensitivity and the like. Having regexp tokens 244 // adjacent is not 245 // valid in any language I'm aware of, so I'm punting. 246 // TODO: maybe style special characters inside a regexp as punctuation. 247 }(); 248 249 // Define regexps here so that the interpreter doesn't have to create an 250 // object each time the function containing them is called. 251 // The language spec requires a new object created even if you don't access 252 // the $1 members. 253 var pr_amp = /&/g; 254 var pr_lt = /</g; 255 var pr_gt = />/g; 256 var pr_quot = /\"/g; 257 /** like textToHtml but escapes double quotes to be attribute safe. */ 258 function attribToHtml(str) { 259 return str.replace(pr_amp, '&') 260 .replace(pr_lt, '<') 261 .replace(pr_gt, '>') 262 .replace(pr_quot, '"'); 263 } 264 265 /** escapest html special characters to html. */ 266 function textToHtml(str) { 267 return str.replace(pr_amp, '&') 268 .replace(pr_lt, '<') 269 .replace(pr_gt, '>'); 270 } 271 272 273 var pr_ltEnt = /</g; 274 var pr_gtEnt = />/g; 275 var pr_aposEnt = /'/g; 276 var pr_quotEnt = /"/g; 277 var pr_ampEnt = /&/g; 278 var pr_nbspEnt = / /g; 279 /** unescapes html to plain text. */ 280 function htmlToText(html) { 281 var pos = html.indexOf('&'); 282 if (pos < 0) { return html; } 283 // Handle numeric entities specially. We can't use functional substitution 284 // since that doesn't work in older versions of Safari. 285 // These should be rare since most browsers convert them to normal chars. 286 for (--pos; (pos = html.indexOf('&#', pos + 1)) >= 0;) { 287 var end = html.indexOf(';', pos); 288 if (end >= 0) { 289 var num = html.substring(pos + 3, end); 290 var radix = 10; 291 if (num && num.charAt(0) === 'x') { 292 num = num.substring(1); 293 radix = 16; 294 } 295 var codePoint = parseInt(num, radix); 296 if (!isNaN(codePoint)) { 297 html = (html.substring(0, pos) + String.fromCharCode(codePoint) + 298 html.substring(end + 1)); 299 } 300 } 301 } 302 303 return html.replace(pr_ltEnt, '<') 304 .replace(pr_gtEnt, '>') 305 .replace(pr_aposEnt, "'") 306 .replace(pr_quotEnt, '"') 307 .replace(pr_ampEnt, '&') 308 .replace(pr_nbspEnt, ' '); 309 } 310 311 /** is the given node's innerHTML normally unescaped? */ 312 function isRawContent(node) { 313 return 'XMP' === node.tagName; 314 } 315 316 function normalizedHtml(node, out) { 317 switch (node.nodeType) { 318 case 1: // an element 319 var name = node.tagName.toLowerCase(); 320 out.push('<', name); 321 for (var i = 0; i < node.attributes.length; ++i) { 322 var attr = node.attributes[i]; 323 if (!attr.specified) { continue; } 324 out.push(' '); 325 normalizedHtml(attr, out); 326 } 327 out.push('>'); 328 for (var child = node.firstChild; child; child = child.nextSibling) { 329 normalizedHtml(child, out); 330 } 331 if (node.firstChild || !/^(?:br|link|img)$/.test(name)) { 332 out.push('<\/', name, '>'); 333 } 334 break; 335 case 2: // an attribute 336 out.push(node.name.toLowerCase(), '="', attribToHtml(node.value), '"'); 337 break; 338 case 3: case 4: // text 339 out.push(textToHtml(node.nodeValue)); 340 break; 341 } 342 } 343 344 var PR_innerHtmlWorks = null; 345 function getInnerHtml(node) { 346 // inner html is hopelessly broken in Safari 2.0.4 when the content is 347 // an html description of well formed XML and the containing tag is a PRE 348 // tag, so we detect that case and emulate innerHTML. 349 if (null === PR_innerHtmlWorks) { 350 var testNode = document.createElement('PRE'); 351 testNode.appendChild( 352 document.createTextNode('<!DOCTYPE foo PUBLIC "foo bar">\n<foo />')); 353 PR_innerHtmlWorks = !/</.test(testNode.innerHTML); 354 } 355 356 if (PR_innerHtmlWorks) { 357 var content = node.innerHTML; 358 // XMP tags contain unescaped entities so require special handling. 359 if (isRawContent(node)) { 360 content = textToHtml(content); 361 } 362 return content; 363 } 364 365 var out = []; 366 for (var child = node.firstChild; child; child = child.nextSibling) { 367 normalizedHtml(child, out); 368 } 369 return out.join(''); 370 } 371 372 /** returns a function that expand tabs to spaces. This function can be fed 373 * successive chunks of text, and will maintain its own internal state to 374 * keep track of how tabs are expanded. 375 * @return {function (string) : string} a function that takes 376 * plain text and return the text with tabs expanded. 377 * @private 378 */ 379 function makeTabExpander(tabWidth) { 380 var SPACES = ' '; 381 var charInLine = 0; 382 383 return function (plainText) { 384 // walk over each character looking for tabs and newlines. 385 // On tabs, expand them. On newlines, reset charInLine. 386 // Otherwise increment charInLine 387 var out = null; 388 var pos = 0; 389 for (var i = 0, n = plainText.length; i < n; ++i) { 390 var ch = plainText.charAt(i); 391 392 switch (ch) { 393 case '\t': 394 if (!out) { out = []; } 395 out.push(plainText.substring(pos, i)); 396 // calculate how much space we need in front of this part 397 // nSpaces is the amount of padding -- the number of spaces needed 398 // to move us to the next column, where columns occur at factors of 399 // tabWidth. 400 var nSpaces = tabWidth - (charInLine % tabWidth); 401 charInLine += nSpaces; 402 for (; nSpaces >= 0; nSpaces -= SPACES.length) { 403 out.push(SPACES.substring(0, nSpaces)); 404 } 405 pos = i + 1; 406 break; 407 case '\n': 408 charInLine = 0; 409 break; 410 default: 411 ++charInLine; 412 } 413 } 414 if (!out) { return plainText; } 415 out.push(plainText.substring(pos)); 416 return out.join(''); 417 }; 418 } 419 420 // The below pattern matches one of the following 421 // (1) /[^<]+/ : A run of characters other than '<' 422 // (2) /<!--.*?-->/: an HTML comment 423 // (3) /<!\[CDATA\[.*?\]\]>/: a cdata section 424 // (3) /<\/?[a-zA-Z][^>]*>/ : A probably tag that should not be highlighted 425 // (4) /</ : A '<' that does not begin a larger chunk. Treated as 1 426 var pr_chunkPattern = 427 /(?:[^<]+|<!--[\s\S]*?-->|<!\[CDATA\[([\s\S]*?)\]\]>|<\/?[a-zA-Z][^>]*>|<)/g; 428 var pr_commentPrefix = /^<!--/; 429 var pr_cdataPrefix = /^<\[CDATA\[/; 430 var pr_brPrefix = /^<br\b/i; 431 var pr_tagNameRe = /^<(\/?)([a-zA-Z]+)/; 432 433 /** split markup into chunks of html tags (style null) and 434 * plain text (style {@link #PR_PLAIN}), converting tags which are 435 * significant for tokenization (<br>) into their textual equivalent. 436 * 437 * @param {string} s html where whitespace is considered significant. 438 * @return {Object} source code and extracted tags. 439 * @private 440 */ 441 function extractTags(s) { 442 // since the pattern has the 'g' modifier and defines no capturing groups, 443 // this will return a list of all chunks which we then classify and wrap as 444 // PR_Tokens 445 var matches = s.match(pr_chunkPattern); 446 var sourceBuf = []; 447 var sourceBufLen = 0; 448 var extractedTags = []; 449 if (matches) { 450 for (var i = 0, n = matches.length; i < n; ++i) { 451 var match = matches[i]; 452 if (match.length > 1 && match.charAt(0) === '<') { 453 if (pr_commentPrefix.test(match)) { continue; } 454 if (pr_cdataPrefix.test(match)) { 455 // strip CDATA prefix and suffix. Don't unescape since it's CDATA 456 sourceBuf.push(match.substring(9, match.length - 3)); 457 sourceBufLen += match.length - 12; 458 } else if (pr_brPrefix.test(match)) { 459 // <br> tags are lexically significant so convert them to text. 460 // This is undone later. 461 sourceBuf.push('\n'); 462 ++sourceBufLen; 463 } else { 464 if (match.indexOf(PR_NOCODE) >= 0 && isNoCodeTag(match)) { 465 // A <span class="nocode"> will start a section that should be 466 // ignored. Continue walking the list until we see a matching end 467 // tag. 468 var name = match.match(pr_tagNameRe)[2]; 469 var depth = 1; 470 end_tag_loop: 471 for (var j = i + 1; j < n; ++j) { 472 var name2 = matches[j].match(pr_tagNameRe); 473 if (name2 && name2[2] === name) { 474 if (name2[1] === '/') { 475 if (--depth === 0) { break end_tag_loop; } 476 } else { 477 ++depth; 478 } 479 } 480 } 481 if (j < n) { 482 extractedTags.push( 483 sourceBufLen, matches.slice(i, j + 1).join('')); 484 i = j; 485 } else { // Ignore unclosed sections. 486 extractedTags.push(sourceBufLen, match); 487 } 488 } else { 489 extractedTags.push(sourceBufLen, match); 490 } 491 } 492 } else { 493 var literalText = htmlToText(match); 494 sourceBuf.push(literalText); 495 sourceBufLen += literalText.length; 496 } 497 } 498 } 499 return { source: sourceBuf.join(''), tags: extractedTags }; 500 } 501 502 /** True if the given tag contains a class attribute with the nocode class. */ 503 function isNoCodeTag(tag) { 504 return !!tag 505 // First canonicalize the representation of attributes 506 .replace(/\s(\w+)\s*=\s*(?:\"([^\"]*)\"|'([^\']*)'|(\S+))/g, 507 ' $1="$2$3$4"') 508 // Then look for the attribute we want. 509 .match(/[cC][lL][aA][sS][sS]=\"[^\"]*\bnocode\b/); 510 } 511 512 /** Given triples of [style, pattern, context] returns a lexing function, 513 * The lexing function interprets the patterns to find token boundaries and 514 * returns a decoration list of the form 515 * [index_0, style_0, index_1, style_1, ..., index_n, style_n] 516 * where index_n is an index into the sourceCode, and style_n is a style 517 * constant like PR_PLAIN. index_n-1 <= index_n, and style_n-1 applies to 518 * all characters in sourceCode[index_n-1:index_n]. 519 * 520 * The stylePatterns is a list whose elements have the form 521 * [style : string, pattern : RegExp, context : RegExp, shortcut : string]. 522 & 523 * Style is a style constant like PR_PLAIN. 524 * 525 * Pattern must only match prefixes, and if it matches a prefix and context 526 * is null or matches the last non-comment token parsed, then that match is 527 * considered a token with the same style. 528 * 529 * Context is applied to the last non-whitespace, non-comment token 530 * recognized. 531 * 532 * Shortcut is an optional string of characters, any of which, if the first 533 * character, gurantee that this pattern and only this pattern matches. 534 * 535 * @param {Array} shortcutStylePatterns patterns that always start with 536 * a known character. Must have a shortcut string. 537 * @param {Array} fallthroughStylePatterns patterns that will be tried in 538 * order if the shortcut ones fail. May have shortcuts. 539 * 540 * @return {function (string, number?) : Array.<number|string>} a 541 * function that takes source code and returns a list of decorations. 542 */ 543 function createSimpleLexer(shortcutStylePatterns, 544 fallthroughStylePatterns) { 545 var shortcuts = {}; 546 (function () { 547 var allPatterns = shortcutStylePatterns.concat(fallthroughStylePatterns); 548 for (var i = allPatterns.length; --i >= 0;) { 549 var patternParts = allPatterns[i]; 550 var shortcutChars = patternParts[3]; 551 if (shortcutChars) { 552 for (var c = shortcutChars.length; --c >= 0;) { 553 shortcuts[shortcutChars.charAt(c)] = patternParts; 554 } 555 } 556 } 557 })(); 558 559 var nPatterns = fallthroughStylePatterns.length; 560 var notWs = /\S/; 561 562 return function (sourceCode, opt_basePos) { 563 opt_basePos = opt_basePos || 0; 564 var decorations = [opt_basePos, PR_PLAIN]; 565 var lastToken = ''; 566 var pos = 0; // index into sourceCode 567 var tail = sourceCode; 568 569 while (tail.length) { 570 var style; 571 var token = null; 572 var match; 573 574 var patternParts = shortcuts[tail.charAt(0)]; 575 if (patternParts) { 576 match = tail.match(patternParts[1]); 577 token = match[0]; 578 style = patternParts[0]; 579 } else { 580 for (var i = 0; i < nPatterns; ++i) { 581 patternParts = fallthroughStylePatterns[i]; 582 var contextPattern = patternParts[2]; 583 if (contextPattern && !contextPattern.test(lastToken)) { 584 // rule can't be used 585 continue; 586 } 587 match = tail.match(patternParts[1]); 588 if (match) { 589 token = match[0]; 590 style = patternParts[0]; 591 break; 592 } 593 } 594 595 if (!token) { // make sure that we make progress 596 style = PR_PLAIN; 597 token = tail.substring(0, 1); 598 } 599 } 600 601 decorations.push(opt_basePos + pos, style); 602 pos += token.length; 603 tail = tail.substring(token.length); 604 if (style !== PR_COMMENT && notWs.test(token)) { lastToken = token; } 605 } 606 return decorations; 607 }; 608 } 609 610 var PR_MARKUP_LEXER = createSimpleLexer([], [ 611 [PR_PLAIN, /^[^<]+/, null], 612 [PR_DECLARATION, /^<!\w[^>]*(?:>|$)/, null], 613 [PR_COMMENT, /^<!--[\s\S]*?(?:-->|$)/, null], 614 [PR_SOURCE, /^<\?[\s\S]*?(?:\?>|$)/, null], 615 [PR_SOURCE, /^<%[\s\S]*?(?:%>|$)/, null], 616 [PR_SOURCE, 617 // Tags whose content is not escaped, and which contain source code. 618 /^<(script|style|xmp)\b[^>]*>[\s\S]*?<\/\1\b[^>]*>/i, null], 619 [PR_TAG, /^<\/?\w[^<>]*>/, null] 620 ]); 621 // Splits any of the source|style|xmp entries above into a start tag, 622 // source content, and end tag. 623 var PR_SOURCE_CHUNK_PARTS = /^(<[^>]*>)([\s\S]*)(<\/[^>]*>)$/; 624 /** split markup on tags, comments, application directives, and other top 625 * level constructs. Tags are returned as a single token - attributes are 626 * not yet broken out. 627 * @private 628 */ 629 function tokenizeMarkup(source) { 630 var decorations = PR_MARKUP_LEXER(source); 631 for (var i = 0; i < decorations.length; i += 2) { 632 if (decorations[i + 1] === PR_SOURCE) { 633 var start, end; 634 start = decorations[i]; 635 end = i + 2 < decorations.length ? decorations[i + 2] : source.length; 636 // Split out start and end script tags as actual tags, and leave the 637 // body with style SCRIPT. 638 var sourceChunk = source.substring(start, end); 639 var match = sourceChunk.match(PR_SOURCE_CHUNK_PARTS); 640 if (match) { 641 decorations.splice( 642 i, 2, 643 start, PR_TAG, // the open chunk 644 start + match[1].length, PR_SOURCE, 645 start + match[1].length + (match[2] || '').length, PR_TAG); 646 } 647 } 648 } 649 return decorations; 650 } 651 652 var PR_TAG_LEXER = createSimpleLexer([ 653 [PR_ATTRIB_VALUE, /^\'[^\']*(?:\'|$)/, null, "'"], 654 [PR_ATTRIB_VALUE, /^\"[^\"]*(?:\"|$)/, null, '"'], 655 [PR_PUNCTUATION, /^[<>\/=]+/, null, '<>/='] 656 ], [ 657 [PR_TAG, /^[\w:\-]+/, /^</], 658 [PR_ATTRIB_VALUE, /^[\w\-]+/, /^=/], 659 [PR_ATTRIB_NAME, /^[\w:\-]+/, null], 660 [PR_PLAIN, /^\s+/, null, ' \t\r\n'] 661 ]); 662 /** split tags attributes and their values out from the tag name, and 663 * recursively lex source chunks. 664 * @private 665 */ 666 function splitTagAttributes(source, decorations) { 667 for (var i = 0; i < decorations.length; i += 2) { 668 var style = decorations[i + 1]; 669 if (style === PR_TAG) { 670 var start, end; 671 start = decorations[i]; 672 end = i + 2 < decorations.length ? decorations[i + 2] : source.length; 673 var chunk = source.substring(start, end); 674 var subDecorations = PR_TAG_LEXER(chunk, start); 675 spliceArrayInto(subDecorations, decorations, i, 2); 676 i += subDecorations.length - 2; 677 } 678 } 679 return decorations; 680 } 681 682 /** returns a function that produces a list of decorations from source text. 683 * 684 * This code treats ", ', and ` as string delimiters, and \ as a string 685 * escape. It does not recognize perl's qq() style strings. 686 * It has no special handling for double delimiter escapes as in basic, or 687 * the tripled delimiters used in python, but should work on those regardless 688 * although in those cases a single string literal may be broken up into 689 * multiple adjacent string literals. 690 * 691 * It recognizes C, C++, and shell style comments. 692 * 693 * @param {Object} options a set of optional parameters. 694 * @return {function (string) : Array.<string|number>} a 695 * decorator that takes sourceCode as plain text and that returns a 696 * decoration list 697 */ 698 function sourceDecorator(options) { 699 var shortcutStylePatterns = [], fallthroughStylePatterns = []; 700 if (options.tripleQuotedStrings) { 701 // '''multi-line-string''', 'single-line-string', and double-quoted 702 shortcutStylePatterns.push( 703 [PR_STRING, /^(?:\'\'\'(?:[^\'\\]|\\[\s\S]|\'{1,2}(?=[^\']))*(?:\'\'\'|$)|\"\"\"(?:[^\"\\]|\\[\s\S]|\"{1,2}(?=[^\"]))*(?:\"\"\"|$)|\'(?:[^\\\']|\\[\s\S])*(?:\'|$)|\"(?:[^\\\"]|\\[\s\S])*(?:\"|$))/, 704 null, '\'"']); 705 } else if (options.multiLineStrings) { 706 // 'multi-line-string', "multi-line-string" 707 shortcutStylePatterns.push( 708 [PR_STRING, /^(?:\'(?:[^\\\']|\\[\s\S])*(?:\'|$)|\"(?:[^\\\"]|\\[\s\S])*(?:\"|$)|\`(?:[^\\\`]|\\[\s\S])*(?:\`|$))/, 709 null, '\'"`']); 710 } else { 711 // 'single-line-string', "single-line-string" 712 shortcutStylePatterns.push( 713 [PR_STRING, 714 /^(?:\'(?:[^\\\'\r\n]|\\.)*(?:\'|$)|\"(?:[^\\\"\r\n]|\\.)*(?:\"|$))/, 715 null, '"\'']); 716 } 717 fallthroughStylePatterns.push( 718 [PR_PLAIN, /^(?:[^\'\"\`\/\#]+)/, null, ' \r\n']); 719 if (options.hashComments) { 720 shortcutStylePatterns.push([PR_COMMENT, /^#[^\r\n]*/, null, '#']); 721 } 722 if (options.cStyleComments) { 723 fallthroughStylePatterns.push([PR_COMMENT, /^\/\/[^\r\n]*/, null]); 724 fallthroughStylePatterns.push( 725 [PR_COMMENT, /^\/\*[\s\S]*?(?:\*\/|$)/, null]); 726 } 727 if (options.regexLiterals) { 728 var REGEX_LITERAL = ( 729 // A regular expression literal starts with a slash that is 730 // not followed by * or / so that it is not confused with 731 // comments. 732 '^/(?=[^/*])' 733 // and then contains any number of raw characters, 734 + '(?:[^/\\x5B\\x5C]' 735 // escape sequences (\x5C), 736 + '|\\x5C[\\s\\S]' 737 // or non-nesting character sets (\x5B\x5D); 738 + '|\\x5B(?:[^\\x5C\\x5D]|\\x5C[\\s\\S])*(?:\\x5D|$))+' 739 // finally closed by a /. 740 + '(?:/|$)'); 741 fallthroughStylePatterns.push( 742 [PR_STRING, new RegExp(REGEX_LITERAL), REGEXP_PRECEDER_PATTERN]); 743 } 744 745 var keywords = wordSet(options.keywords); 746 747 options = null; 748 749 /** splits the given string into comment, string, and "other" tokens. 750 * @param {string} sourceCode as plain text 751 * @return {Array.<number|string>} a decoration list. 752 * @private 753 */ 754 var splitStringAndCommentTokens = createSimpleLexer( 755 shortcutStylePatterns, fallthroughStylePatterns); 756 757 var styleLiteralIdentifierPuncRecognizer = createSimpleLexer([], [ 758 [PR_PLAIN, /^\s+/, null, ' \r\n'], 759 // TODO(mikesamuel): recognize non-latin letters and numerals in idents 760 [PR_PLAIN, /^[a-z_$@][a-z_$@0-9]*/i, null], 761 // A hex number 762 [PR_LITERAL, /^0x[a-f0-9]+[a-z]/i, null], 763 // An octal or decimal number, possibly in scientific notation 764 [PR_LITERAL, 765 /^(?:\d(?:_\d+)*\d*(?:\.\d*)?|\.\d+)(?:e[+\-]?\d+)?[a-z]*/i, 766 null, '123456789'], 767 [PR_PUNCTUATION, /^[^\s\w\.$@]+/, null] 768 // Fallback will handle decimal points not adjacent to a digit 769 ]); 770 771 /** splits plain text tokens into more specific tokens, and then tries to 772 * recognize keywords, and types. 773 * @private 774 */ 775 function splitNonStringNonCommentTokens(source, decorations) { 776 for (var i = 0; i < decorations.length; i += 2) { 777 var style = decorations[i + 1]; 778 if (style === PR_PLAIN) { 779 var start, end, chunk, subDecs; 780 start = decorations[i]; 781 end = i + 2 < decorations.length ? decorations[i + 2] : source.length; 782 chunk = source.substring(start, end); 783 subDecs = styleLiteralIdentifierPuncRecognizer(chunk, start); 784 for (var j = 0, m = subDecs.length; j < m; j += 2) { 785 var subStyle = subDecs[j + 1]; 786 if (subStyle === PR_PLAIN) { 787 var subStart = subDecs[j]; 788 var subEnd = j + 2 < m ? subDecs[j + 2] : chunk.length; 789 var token = source.substring(subStart, subEnd); 790 if (token === '.') { 791 subDecs[j + 1] = PR_PUNCTUATION; 792 } else if (token in keywords) { 793 subDecs[j + 1] = PR_KEYWORD; 794 } else if (/^@?[A-Z][A-Z$]*[a-z][A-Za-z$]*$/.test(token)) { 795 // classify types and annotations using Java's style conventions 796 subDecs[j + 1] = token.charAt(0) === '@' ? PR_LITERAL : PR_TYPE; 797 } 798 } 799 } 800 spliceArrayInto(subDecs, decorations, i, 2); 801 i += subDecs.length - 2; 802 } 803 } 804 return decorations; 805 } 806 807 return function (sourceCode) { 808 // Split into strings, comments, and other. 809 // We do this because strings and comments are easily recognizable and can 810 // contain stuff that looks like other tokens, so we want to mark those 811 // early so we don't recurse into them. 812 var decorations = splitStringAndCommentTokens(sourceCode); 813 814 // Split non comment|string tokens on whitespace and word boundaries 815 decorations = splitNonStringNonCommentTokens(sourceCode, decorations); 816 817 return decorations; 818 }; 819 } 820 821 var decorateSource = sourceDecorator({ 822 keywords: ALL_KEYWORDS, 823 hashComments: true, 824 cStyleComments: true, 825 multiLineStrings: true, 826 regexLiterals: true 827 }); 828 829 /** identify regions of markup that are really source code, and recursivley 830 * lex them. 831 * @private 832 */ 833 function splitSourceNodes(source, decorations) { 834 for (var i = 0; i < decorations.length; i += 2) { 835 var style = decorations[i + 1]; 836 if (style === PR_SOURCE) { 837 // Recurse using the non-markup lexer 838 var start, end; 839 start = decorations[i]; 840 end = i + 2 < decorations.length ? decorations[i + 2] : source.length; 841 var subDecorations = decorateSource(source.substring(start, end)); 842 for (var j = 0, m = subDecorations.length; j < m; j += 2) { 843 subDecorations[j] += start; 844 } 845 spliceArrayInto(subDecorations, decorations, i, 2); 846 i += subDecorations.length - 2; 847 } 848 } 849 return decorations; 850 } 851 852 /** identify attribute values that really contain source code and recursively 853 * lex them. 854 * @private 855 */ 856 function splitSourceAttributes(source, decorations) { 857 var nextValueIsSource = false; 858 for (var i = 0; i < decorations.length; i += 2) { 859 var style = decorations[i + 1]; 860 var start, end; 861 if (style === PR_ATTRIB_NAME) { 862 start = decorations[i]; 863 end = i + 2 < decorations.length ? decorations[i + 2] : source.length; 864 nextValueIsSource = /^on|^style$/i.test(source.substring(start, end)); 865 } else if (style === PR_ATTRIB_VALUE) { 866 if (nextValueIsSource) { 867 start = decorations[i]; 868 end = i + 2 < decorations.length ? decorations[i + 2] : source.length; 869 var attribValue = source.substring(start, end); 870 var attribLen = attribValue.length; 871 var quoted = 872 (attribLen >= 2 && /^[\"\']/.test(attribValue) && 873 attribValue.charAt(0) === attribValue.charAt(attribLen - 1)); 874 875 var attribSource; 876 var attribSourceStart; 877 var attribSourceEnd; 878 if (quoted) { 879 attribSourceStart = start + 1; 880 attribSourceEnd = end - 1; 881 attribSource = attribValue; 882 } else { 883 attribSourceStart = start + 1; 884 attribSourceEnd = end - 1; 885 attribSource = attribValue.substring(1, attribValue.length - 1); 886 } 887 888 var attribSourceDecorations = decorateSource(attribSource); 889 for (var j = 0, m = attribSourceDecorations.length; j < m; j += 2) { 890 attribSourceDecorations[j] += attribSourceStart; 891 } 892 893 if (quoted) { 894 attribSourceDecorations.push(attribSourceEnd, PR_ATTRIB_VALUE); 895 spliceArrayInto(attribSourceDecorations, decorations, i + 2, 0); 896 } else { 897 spliceArrayInto(attribSourceDecorations, decorations, i, 2); 898 } 899 } 900 nextValueIsSource = false; 901 } 902 } 903 return decorations; 904 } 905 906 /** returns a decoration list given a string of markup. 907 * 908 * This code recognizes a number of constructs. 909 * <!-- ... --> comment 910 * <!\w ... > declaration 911 * <\w ... > tag 912 * </\w ... > tag 913 * <?...?> embedded source 914 * <%...%> embedded source 915 * &[#\w]...; entity 916 * 917 * It does not recognizes %foo; doctype entities from . 918 * 919 * It will recurse into any <style>, <script>, and on* attributes using 920 * PR_lexSource. 921 */ 922 function decorateMarkup(sourceCode) { 923 // This function works as follows: 924 // 1) Start by splitting the markup into text and tag chunks 925 // Input: string s 926 // Output: List<PR_Token> where style in (PR_PLAIN, null) 927 // 2) Then split the text chunks further into comments, declarations, 928 // tags, etc. 929 // After each split, consider whether the token is the start of an 930 // embedded source section, i.e. is an open <script> tag. If it is, find 931 // the corresponding close token, and don't bother to lex in between. 932 // Input: List<string> 933 // Output: List<PR_Token> with style in 934 // (PR_TAG, PR_PLAIN, PR_SOURCE, null) 935 // 3) Finally go over each tag token and split out attribute names and 936 // values. 937 // Input: List<PR_Token> 938 // Output: List<PR_Token> where style in 939 // (PR_TAG, PR_PLAIN, PR_SOURCE, NAME, VALUE, null) 940 var decorations = tokenizeMarkup(sourceCode); 941 decorations = splitTagAttributes(sourceCode, decorations); 942 decorations = splitSourceNodes(sourceCode, decorations); 943 decorations = splitSourceAttributes(sourceCode, decorations); 944 return decorations; 945 } 946 947 /** 948 * @param {string} sourceText plain text 949 * @param {Array.<number|string>} extractedTags chunks of raw html preceded 950 * by their position in sourceText in order. 951 * @param {Array.<number|string>} decorations style classes preceded by their 952 * position in sourceText in order. 953 * @return {string} html 954 * @private 955 */ 956 function recombineTagsAndDecorations(sourceText, extractedTags, decorations) { 957 var html = []; 958 // index past the last char in sourceText written to html 959 var outputIdx = 0; 960 961 var openDecoration = null; 962 var currentDecoration = null; 963 var tagPos = 0; // index into extractedTags 964 var decPos = 0; // index into decorations 965 var tabExpander = makeTabExpander(PR_TAB_WIDTH); 966 967 var adjacentSpaceRe = /([\r\n ]) /g; 968 var startOrSpaceRe = /(^| ) /gm; 969 var newlineRe = /\r\n?|\n/g; 970 var trailingSpaceRe = /[ \r\n]$/; 971 var lastWasSpace = true; // the last text chunk emitted ended with a space. 972 973 // A helper function that is responsible for opening sections of decoration 974 // and outputing properly escaped chunks of source 975 function emitTextUpTo(sourceIdx) { 976 if (sourceIdx > outputIdx) { 977 if (openDecoration && openDecoration !== currentDecoration) { 978 // Close the current decoration 979 html.push('</span>'); 980 openDecoration = null; 981 } 982 if (!openDecoration && currentDecoration) { 983 openDecoration = currentDecoration; 984 html.push('<span class="', openDecoration, '">'); 985 } 986 // This interacts badly with some wikis which introduces paragraph tags 987 // into pre blocks for some strange reason. 988 // It's necessary for IE though which seems to lose the preformattedness 989 // of <pre> tags when their innerHTML is assigned. 990 // http://stud3.tuwien.ac.at/~e0226430/innerHtmlQuirk.html 991 // and it serves to undo the conversion of <br>s to newlines done in 992 // chunkify. 993 var htmlChunk = textToHtml( 994 tabExpander(sourceText.substring(outputIdx, sourceIdx))) 995 .replace(lastWasSpace 996 ? startOrSpaceRe 997 : adjacentSpaceRe, '$1 '); 998 // Keep track of whether we need to escape space at the beginning of the 999 // next chunk. 1000 lastWasSpace = trailingSpaceRe.test(htmlChunk); 1001 html.push(htmlChunk.replace(newlineRe, '<br />')); 1002 outputIdx = sourceIdx; 1003 } 1004 } 1005 1006 while (true) { 1007 // Determine if we're going to consume a tag this time around. Otherwise 1008 // we consume a decoration or exit. 1009 var outputTag; 1010 if (tagPos < extractedTags.length) { 1011 if (decPos < decorations.length) { 1012 // Pick one giving preference to extractedTags since we shouldn't open 1013 // a new style that we're going to have to immediately close in order 1014 // to output a tag. 1015 outputTag = extractedTags[tagPos] <= decorations[decPos]; 1016 } else { 1017 outputTag = true; 1018 } 1019 } else { 1020 outputTag = false; 1021 } 1022 // Consume either a decoration or a tag or exit. 1023 if (outputTag) { 1024 emitTextUpTo(extractedTags[tagPos]); 1025 if (openDecoration) { 1026 // Close the current decoration 1027 html.push('</span>'); 1028 openDecoration = null; 1029 } 1030 html.push(extractedTags[tagPos + 1]); 1031 tagPos += 2; 1032 } else if (decPos < decorations.length) { 1033 emitTextUpTo(decorations[decPos]); 1034 currentDecoration = decorations[decPos + 1]; 1035 decPos += 2; 1036 } else { 1037 break; 1038 } 1039 } 1040 emitTextUpTo(sourceText.length); 1041 if (openDecoration) { 1042 html.push('</span>'); 1043 } 1044 1045 return html.join(''); 1046 } 1047 1048 /** Maps language-specific file extensions to handlers. */ 1049 var langHandlerRegistry = {}; 1050 /** Register a language handler for the given file extensions. 1051 * @param {function (string) : Array.<number|string>} handler 1052 * a function from source code to a list of decorations. 1053 * @param {Array.<string>} fileExtensions 1054 */ 1055 function registerLangHandler(handler, fileExtensions) { 1056 for (var i = fileExtensions.length; --i >= 0;) { 1057 var ext = fileExtensions[i]; 1058 if (!langHandlerRegistry.hasOwnProperty(ext)) { 1059 langHandlerRegistry[ext] = handler; 1060 } else if ('console' in window) { 1061 console.log('cannot override language handler %s', ext); 1062 } 1063 } 1064 } 1065 registerLangHandler(decorateSource, ['default-code']); 1066 registerLangHandler(decorateMarkup, 1067 ['default-markup', 'html', 'htm', 'xhtml', 'xml', 'xsl']); 1068 registerLangHandler(sourceDecorator({ 1069 keywords: CPP_KEYWORDS, 1070 hashComments: true, 1071 cStyleComments: true 1072 }), ['c', 'cc', 'cpp', 'cxx', 'cyc']); 1073 registerLangHandler(sourceDecorator({ 1074 keywords: CSHARP_KEYWORDS, 1075 hashComments: true, 1076 cStyleComments: true 1077 }), ['cs']); 1078 registerLangHandler(sourceDecorator({ 1079 keywords: JAVA_KEYWORDS, 1080 cStyleComments: true 1081 }), ['java']); 1082 registerLangHandler(sourceDecorator({ 1083 keywords: SH_KEYWORDS, 1084 hashComments: true, 1085 multiLineStrings: true 1086 }), ['bsh', 'csh', 'sh']); 1087 registerLangHandler(sourceDecorator({ 1088 keywords: PYTHON_KEYWORDS, 1089 hashComments: true, 1090 multiLineStrings: true, 1091 tripleQuotedStrings: true 1092 }), ['cv', 'py']); 1093 registerLangHandler(sourceDecorator({ 1094 keywords: PERL_KEYWORDS, 1095 hashComments: true, 1096 multiLineStrings: true, 1097 regexLiterals: true 1098 }), ['perl', 'pl', 'pm']); 1099 registerLangHandler(sourceDecorator({ 1100 keywords: RUBY_KEYWORDS, 1101 hashComments: true, 1102 multiLineStrings: true, 1103 regexLiterals: true 1104 }), ['rb']); 1105 registerLangHandler(sourceDecorator({ 1106 keywords: JSCRIPT_KEYWORDS, 1107 cStyleComments: true, 1108 regexLiterals: true 1109 }), ['js']); 1110 1111 function prettyPrintOne(sourceCodeHtml, opt_langExtension) { 1112 try { 1113 // Extract tags, and convert the source code to plain text. 1114 var sourceAndExtractedTags = extractTags(sourceCodeHtml); 1115 /** Plain text. @type {string} */ 1116 var source = sourceAndExtractedTags.source; 1117 1118 /** Even entries are positions in source in ascending order. Odd entries 1119 * are tags that were extracted at that position. 1120 * @type {Array.<number|string>} 1121 */ 1122 var extractedTags = sourceAndExtractedTags.tags; 1123 1124 // Pick a lexer and apply it. 1125 if (!langHandlerRegistry.hasOwnProperty(opt_langExtension)) { 1126 // Treat it as markup if the first non whitespace character is a < and 1127 // the last non-whitespace character is a >. 1128 opt_langExtension = 1129 /^\s*</.test(source) ? 'default-markup' : 'default-code'; 1130 } 1131 1132 /** Even entries are positions in source in ascending order. Odd enties 1133 * are style markers (e.g., PR_COMMENT) that run from that position until 1134 * the end. 1135 * @type {Array.<number|string>} 1136 */ 1137 var decorations = langHandlerRegistry[opt_langExtension].call({}, source); 1138 1139 // Integrate the decorations and tags back into the source code to produce 1140 // a decorated html string. 1141 return recombineTagsAndDecorations(source, extractedTags, decorations); 1142 } catch (e) { 1143 if ('console' in window) { 1144 console.log(e); 1145 console.trace(); 1146 } 1147 return sourceCodeHtml; 1148 } 1149 } 1150 1151 function prettyPrint(opt_whenDone) { 1152 var isIE6 = _pr_isIE6(); 1153 1154 // fetch a list of nodes to rewrite 1155 var codeSegments = [ 1156 document.getElementsByTagName('pre'), 1157 document.getElementsByTagName('code'), 1158 document.getElementsByTagName('xmp') ]; 1159 var elements = []; 1160 for (var i = 0; i < codeSegments.length; ++i) { 1161 for (var j = 0; j < codeSegments[i].length; ++j) { 1162 elements.push(codeSegments[i][j]); 1163 } 1164 } 1165 codeSegments = null; 1166 1167 // the loop is broken into a series of continuations to make sure that we 1168 // don't make the browser unresponsive when rewriting a large page. 1169 var k = 0; 1170 1171 function doWork() { 1172 var endTime = (PR_SHOULD_USE_CONTINUATION ? 1173 new Date().getTime() + 250 /* ms */ : 1174 Infinity); 1175 for (; k < elements.length && new Date().getTime() < endTime; k++) { 1176 var cs = elements[k]; 1177 if (cs.className && cs.className.indexOf('prettyprint') >= 0) { 1178 // If the classes includes a language extensions, use it. 1179 // Language extensions can be specified like 1180 // <pre class="prettyprint lang-cpp"> 1181 // the language extension "cpp" is used to find a language handler as 1182 // passed to PR_registerLangHandler. 1183 var langExtension = cs.className.match(/\blang-(\w+)\b/); 1184 if (langExtension) { langExtension = langExtension[1]; } 1185 1186 // make sure this is not nested in an already prettified element 1187 var nested = false; 1188 for (var p = cs.parentNode; p; p = p.parentNode) { 1189 if ((p.tagName === 'pre' || p.tagName === 'code' || 1190 p.tagName === 'xmp') && 1191 p.className && p.className.indexOf('prettyprint') >= 0) { 1192 nested = true; 1193 break; 1194 } 1195 } 1196 if (!nested) { 1197 // fetch the content as a snippet of properly escaped HTML. 1198 // Firefox adds newlines at the end. 1199 var content = getInnerHtml(cs); 1200 content = content.replace(/(?:\r\n?|\n)$/, ''); 1201 1202 // do the pretty printing 1203 var newContent = prettyPrintOne(content, langExtension); 1204 1205 // push the prettified html back into the tag. 1206 if (!isRawContent(cs)) { 1207 // just replace the old html with the new 1208 cs.innerHTML = newContent; 1209 } else { 1210 // we need to change the tag to a <pre> since <xmp>s do not allow 1211 // embedded tags such as the span tags used to attach styles to 1212 // sections of source code. 1213 var pre = document.createElement('PRE'); 1214 for (var i = 0; i < cs.attributes.length; ++i) { 1215 var a = cs.attributes[i]; 1216 if (a.specified) { 1217 var aname = a.name.toLowerCase(); 1218 if (aname === 'class') { 1219 pre.className = a.value; // For IE 6 1220 } else { 1221 pre.setAttribute(a.name, a.value); 1222 } 1223 } 1224 } 1225 pre.innerHTML = newContent; 1226 1227 // remove the old 1228 cs.parentNode.replaceChild(pre, cs); 1229 cs = pre; 1230 } 1231 1232 // Replace <br>s with line-feeds so that copying and pasting works 1233 // on IE 6. 1234 // Doing this on other browsers breaks lots of stuff since \r\n is 1235 // treated as two newlines on Firefox, and doing this also slows 1236 // down rendering. 1237 if (isIE6 && cs.tagName === 'PRE') { 1238 var lineBreaks = cs.getElementsByTagName('br'); 1239 for (var j = lineBreaks.length; --j >= 0;) { 1240 var lineBreak = lineBreaks[j]; 1241 lineBreak.parentNode.replaceChild( 1242 document.createTextNode('\r\n'), lineBreak); 1243 } 1244 } 1245 } 1246 } 1247 } 1248 if (k < elements.length) { 1249 // finish up in a continuation 1250 setTimeout(doWork, 250); 1251 } else if (opt_whenDone) { 1252 opt_whenDone(); 1253 } 1254 } 1255 1256 doWork(); 1257 } 1258 1259 window['PR_normalizedHtml'] = normalizedHtml; 1260 window['prettyPrintOne'] = prettyPrintOne; 1261 window['prettyPrint'] = prettyPrint; 1262 window['PR'] = { 1263 'createSimpleLexer': createSimpleLexer, 1264 'registerLangHandler': registerLangHandler, 1265 'sourceDecorator': sourceDecorator, 1266 'PR_ATTRIB_NAME': PR_ATTRIB_NAME, 1267 'PR_ATTRIB_VALUE': PR_ATTRIB_VALUE, 1268 'PR_COMMENT': PR_COMMENT, 1269 'PR_DECLARATION': PR_DECLARATION, 1270 'PR_KEYWORD': PR_KEYWORD, 1271 'PR_LITERAL': PR_LITERAL, 1272 'PR_NOCODE': PR_NOCODE, 1273 'PR_PLAIN': PR_PLAIN, 1274 'PR_PUNCTUATION': PR_PUNCTUATION, 1275 'PR_SOURCE': PR_SOURCE, 1276 'PR_STRING': PR_STRING, 1277 'PR_TAG': PR_TAG, 1278 'PR_TYPE': PR_TYPE 1279 }; 1280})(); 1281