1 /* 2 * Copyright (C) 2013 Google Inc. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.google.doclava; 18 19 import java.io.*; 20 import java.text.BreakIterator; 21 import java.util.ArrayList; 22 import java.util.Collections; 23 import java.util.Comparator; 24 import java.util.List; 25 import java.util.regex.Pattern; 26 import java.util.regex.Matcher; 27 import java.io.File; 28 29 import com.google.clearsilver.jsilver.data.Data; 30 31 import org.ccil.cowan.tagsoup.*; 32 import org.xml.sax.XMLReader; 33 import org.xml.sax.InputSource; 34 import org.xml.sax.Attributes; 35 import org.xml.sax.helpers.DefaultHandler; 36 37 import org.w3c.dom.Node; 38 import org.w3c.dom.NodeList; 39 40 import javax.xml.transform.dom.DOMResult; 41 import javax.xml.transform.sax.SAXSource; 42 import javax.xml.transform.Transformer; 43 import javax.xml.transform.TransformerFactory; 44 import javax.xml.xpath.XPath; 45 import javax.xml.xpath.XPathConstants; 46 import javax.xml.xpath.XPathExpression; 47 import javax.xml.xpath.XPathFactory; 48 49 /** 50 * Metadata associated with a specific documentation page. Extracts 51 * metadata based on the page's declared hdf vars (meta.tags and others) 52 * as well as implicit data relating to the page, such as url, type, etc. 53 * Includes a Node class that represents the metadata and lets it attach 54 * to parent/child elements in the tree metadata nodes for all pages. 55 * Node also includes methods for rendering the node tree to a json file 56 * in docs output, which is then used by JavaScript to load metadata 57 * objects into html pages. 58 */ 59 60 public class PageMetadata { 61 File mSource; 62 String mDest; 63 String mTagList; 64 static boolean sLowercaseTags = true; 65 static boolean sLowercaseKeywords = true; 66 //static String linkPrefix = (Doclava.META_DBG) ? "/" : "http://developer.android.com/"; 67 /** 68 * regex pattern to match javadoc @link and similar tags. Extracts 69 * root symbol to $1. 70 */ 71 private static final Pattern JD_TAG_PATTERN = 72 Pattern.compile("\\{@.*?[\\s\\.\\#]([A-Za-z\\(\\)\\d_]+)(?=\u007D)\u007D"); 73 PageMetadata(File source, String dest, List<Node> taglist)74 public PageMetadata(File source, String dest, List<Node> taglist) { 75 mSource = source; 76 mDest = dest; 77 78 if (dest != null) { 79 int len = dest.length(); 80 if (len > 1 && dest.charAt(len - 1) != '/') { 81 mDest = dest + '/'; 82 } else { 83 mDest = dest; 84 } 85 } 86 } 87 88 /** 89 * Given a list of metadata nodes organized by type, sort the 90 * root nodes by type name and render the types and their child 91 * metadata nodes to a json file in the out dir. 92 * 93 * @param rootTypeNodesList A list of root metadata nodes, each 94 * representing a type and it's member child pages. 95 */ WriteList(List<Node> rootTypeNodesList)96 public static void WriteList(List<Node> rootTypeNodesList) { 97 98 Collections.sort(rootTypeNodesList, BY_TYPE_NAME); 99 Node pageMeta = new Node.Builder().setLabel("TOP").setChildren(rootTypeNodesList).build(); 100 101 StringBuilder buf = new StringBuilder(); 102 // write the taglist to string format 103 pageMeta.renderTypeResources(buf); 104 pageMeta.renderTypesByTag(buf); 105 // write the taglist to js file 106 Data data = Doclava.makeHDF(); 107 data.setValue("reference_tree", buf.toString()); 108 ClearPage.write(data, "jd_lists_unified.cs", "jd_lists_unified.js"); 109 } 110 111 /** 112 * Given a list of metadata nodes organized by lang, sort the 113 * root nodes by type name and render the types and their child 114 * metadata nodes to separate lang-specific json files in the out dir. 115 * 116 * @param rootNodesList A list of root metadata nodes, each 117 * representing a type and it's member child pages. 118 */ WriteListByLang(List<Node> rootNodesList)119 public static void WriteListByLang(List<Node> rootNodesList) { 120 Collections.sort(rootNodesList, BY_LANG_NAME); 121 for (Node n : rootNodesList) { 122 String langFilename = ""; 123 String langname = n.getLang(); 124 langFilename = "_" + langname; 125 Collections.sort(n.getChildren(), BY_TYPE_NAME); 126 Node pageMeta = new Node.Builder().setLabel("TOP").setChildren(n.getChildren()).build(); 127 128 StringBuilder buf = new StringBuilder(); 129 // write the taglist to string format 130 pageMeta.renderLangResources(buf,langname); 131 //pageMeta.renderTypesByTag(buf); 132 // write the taglist to js file 133 Data data = Doclava.makeHDF(); 134 data.setValue("reference_tree", buf.toString()); 135 data.setValue("metadata.lang", langname); 136 String unifiedFilename = "jd_lists_unified" + langFilename + ".js"; 137 String extrasFilename = "jd_extras" + langFilename + ".js"; 138 // write out jd_lists_unified for each lang 139 ClearPage.write(data, "jd_lists_unified.cs", unifiedFilename); 140 // append jd_extras to jd_lists_unified for each lang, then delete. 141 appendExtrasMetadata(extrasFilename, unifiedFilename); 142 } 143 } 144 145 /** 146 * Extract supported metadata values from a page and add them as 147 * a child node of a root node based on type. Some metadata values 148 * are normalized. Unsupported metadata fields are ignored. See 149 * Node for supported metadata fields and methods for accessing values. 150 * 151 * @param docfile The file from which to extract metadata. 152 * @param dest The output path for the file, used to set link to page. 153 * @param filename The file from which to extract metadata. 154 * @param hdf Data object in which to store the metadata values. 155 * @param tagList The file from which to extract metadata. 156 */ setPageMetadata(String docfile, String dest, String filename, Data hdf, List<Node> tagList)157 public static void setPageMetadata(String docfile, String dest, String filename, 158 Data hdf, List<Node> tagList) { 159 //exclude this page if author does not want it included 160 boolean excludeNode = "true".equals(hdf.getValue("excludeFromSuggestions","")); 161 162 //check whether summary and image exist and if not, get them from itemprop/markup 163 Boolean needsSummary = "".equals(hdf.getValue("page.metaDescription", "")); 164 Boolean needsImage = "".equals(hdf.getValue("page.image", "")); 165 if ((needsSummary) || (needsImage)) { 166 //try to extract the metadata from itemprop and markup 167 inferMetadata(docfile, hdf, needsSummary, needsImage); 168 } 169 170 //extract available metadata and set it in a node 171 if (!excludeNode) { 172 Node pageMeta = new Node.Builder().build(); 173 pageMeta.setLabel(getTitleNormalized(hdf, "page.title")); 174 pageMeta.setCategory(hdf.getValue("page.category","")); 175 pageMeta.setSummary(hdf.getValue("page.metaDescription","")); 176 pageMeta.setLink(getPageUrlNormalized(filename)); 177 pageMeta.setGroup(getStringValueNormalized(hdf,"sample.group")); 178 pageMeta.setKeywords(getPageTagsNormalized(hdf, "page.tags")); 179 pageMeta.setTags(getPageTagsNormalized(hdf, "meta.tags")); 180 pageMeta.setImage(getImageUrlNormalized(hdf.getValue("page.image", ""))); 181 pageMeta.setLang(getLangStringNormalized(hdf, filename)); 182 pageMeta.setType(getStringValueNormalized(hdf, "page.type")); 183 pageMeta.setTimestamp(hdf.getValue("page.timestamp","")); 184 if (Doclava.USE_UPDATED_TEMPLATES) { 185 appendMetaNodeByLang(pageMeta, tagList); 186 } else { 187 appendMetaNodeByType(pageMeta, tagList); 188 } 189 } 190 } 191 192 /** 193 * Attempt to infer page metadata based on the contents of the 194 * file. Load and parse the file as a dom tree. Select values 195 * in this order: 1. dom node specifically tagged with 196 * microdata (itemprop). 2. first qualitifed p or img node. 197 * 198 * @param docfile The file from which to extract metadata. 199 * @param hdf Data object in which to store the metadata values. 200 * @param needsSummary Whether to extract summary metadata. 201 * @param needsImage Whether to extract image metadata. 202 */ inferMetadata(String docfile, Data hdf, Boolean needsSummary, Boolean needsImage)203 public static void inferMetadata(String docfile, Data hdf, 204 Boolean needsSummary, Boolean needsImage) { 205 String sum = ""; 206 String imageUrl = ""; 207 String sumFrom = needsSummary ? "none" : "hdf"; 208 String imgFrom = needsImage ? "none" : "hdf"; 209 String filedata = hdf.getValue("commentText", ""); 210 if (Doclava.META_DBG) System.out.println("----- " + docfile + "\n"); 211 212 try { 213 XPathFactory xpathFac = XPathFactory.newInstance(); 214 XPath xpath = xpathFac.newXPath(); 215 InputStream inputStream = new ByteArrayInputStream(filedata.getBytes()); 216 XMLReader reader = new Parser(); 217 reader.setFeature(Parser.namespacesFeature, false); 218 reader.setFeature(Parser.namespacePrefixesFeature, false); 219 reader.setFeature(Parser.ignoreBogonsFeature, true); 220 221 Transformer transformer = TransformerFactory.newInstance().newTransformer(); 222 DOMResult result = new DOMResult(); 223 transformer.transform(new SAXSource(reader, new InputSource(inputStream)), result); 224 org.w3c.dom.Node htmlNode = result.getNode(); 225 226 if (needsSummary) { 227 StringBuilder sumStrings = new StringBuilder(); 228 XPathExpression ItempropDescExpr = xpath.compile("/descendant-or-self::*" 229 + "[@itemprop='description'][1]//text()[string(.)]"); 230 org.w3c.dom.NodeList nodes = (org.w3c.dom.NodeList) ItempropDescExpr.evaluate(htmlNode, 231 XPathConstants.NODESET); 232 if (nodes.getLength() > 0) { 233 for (int i = 0; i < nodes.getLength(); i++) { 234 String tx = nodes.item(i).getNodeValue(); 235 sumStrings.append(tx); 236 sumFrom = "itemprop"; 237 } 238 } else { 239 XPathExpression FirstParaExpr = xpath.compile("//p[not(../../../" 240 + "@class='notice-developers') and not(../@class='sidebox')" 241 + "and not(@class)]//text()"); 242 nodes = (org.w3c.dom.NodeList) FirstParaExpr.evaluate(htmlNode, XPathConstants.NODESET); 243 if (nodes.getLength() > 0) { 244 for (int i = 0; i < nodes.getLength(); i++) { 245 String tx = nodes.item(i).getNodeValue(); 246 sumStrings.append(tx + " "); 247 sumFrom = "markup"; 248 } 249 } 250 } 251 //found a summary string, now normalize it 252 sum = sumStrings.toString().trim(); 253 if ((sum != null) && (!"".equals(sum))) { 254 sum = getSummaryNormalized(sum); 255 } 256 //normalized summary ended up being too short to be meaningful 257 if ("".equals(sum)) { 258 if (Doclava.META_DBG) System.out.println("Warning: description too short! (" 259 + sum.length() + "chars) ...\n\n"); 260 } 261 //summary looks good, store it to the file hdf data 262 hdf.setValue("page.metaDescription", sum); 263 } 264 if (needsImage) { 265 XPathExpression ItempropImageExpr = xpath.compile("//*[@itemprop='image']/@src"); 266 org.w3c.dom.NodeList imgNodes = (org.w3c.dom.NodeList) ItempropImageExpr.evaluate(htmlNode, 267 XPathConstants.NODESET); 268 if (imgNodes.getLength() > 0) { 269 imageUrl = imgNodes.item(0).getNodeValue(); 270 imgFrom = "itemprop"; 271 } else { 272 XPathExpression FirstImgExpr = xpath.compile("//img/@src"); 273 imgNodes = (org.w3c.dom.NodeList) FirstImgExpr.evaluate(htmlNode, XPathConstants.NODESET); 274 if (imgNodes.getLength() > 0) { 275 //iterate nodes looking for valid image url and normalize. 276 for (int i = 0; i < imgNodes.getLength(); i++) { 277 String tx = imgNodes.item(i).getNodeValue(); 278 //qualify and normalize the image 279 imageUrl = getImageUrlNormalized(tx); 280 //this img src did not qualify, keep looking... 281 if ("".equals(imageUrl)) { 282 if (Doclava.META_DBG) System.out.println(" >>>>> Discarded image: " + tx); 283 continue; 284 } else { 285 imgFrom = "markup"; 286 break; 287 } 288 } 289 } 290 } 291 //img src url looks good, store it to the file hdf data 292 hdf.setValue("page.image", imageUrl); 293 } 294 if (Doclava.META_DBG) System.out.println("Image (" + imgFrom + "): " + imageUrl); 295 if (Doclava.META_DBG) System.out.println("Summary (" + sumFrom + "): " + sum.length() 296 + " chars\n\n" + sum + "\n"); 297 return; 298 299 } catch (Exception e) { 300 if (Doclava.META_DBG) System.out.println(" >>>>> Exception: " + e + "\n"); 301 } 302 } 303 304 /** 305 * Normalize a comma-delimited, multi-string value. Split on commas, remove 306 * quotes, trim whitespace, optionally make keywords/tags lowercase for 307 * easier matching. 308 * 309 * @param hdf Data object in which the metadata values are stored. 310 * @param tag The hdf var from which the metadata was extracted. 311 * @return A normalized string value for the specified tag. 312 */ getPageTagsNormalized(Data hdf, String tag)313 public static String getPageTagsNormalized(Data hdf, String tag) { 314 315 String normTags = ""; 316 StringBuilder tags = new StringBuilder(); 317 String tagList = hdf.getValue(tag, ""); 318 if (tag.equals("meta.tags") && (tagList.equals(""))) { 319 //use keywords as tags if no meta tags are available 320 tagList = hdf.getValue("page.tags", ""); 321 } 322 if (!tagList.equals("")) { 323 tagList = tagList.replaceAll("\"", ""); 324 325 String[] tagParts = tagList.split("[,\u3001]"); 326 for (int iter = 0; iter < tagParts.length; iter++) { 327 tags.append("\""); 328 if (tag.equals("meta.tags") && sLowercaseTags) { 329 tagParts[iter] = tagParts[iter].toLowerCase(); 330 } else if (tag.equals("page.tags") && sLowercaseKeywords) { 331 tagParts[iter] = tagParts[iter].toLowerCase(); 332 } 333 if (tag.equals("meta.tags")) { 334 //tags.append("#"); //to match hashtag format used with yt/blogger resources 335 tagParts[iter] = tagParts[iter].replaceAll(" ",""); 336 } 337 tags.append(tagParts[iter].trim()); 338 tags.append("\""); 339 if (iter < tagParts.length - 1) { 340 tags.append(","); 341 } 342 } 343 } 344 //write this back to hdf to expose through js 345 if (tag.equals("meta.tags")) { 346 hdf.setValue(tag, tags.toString()); 347 } 348 return tags.toString(); 349 } 350 351 /** 352 * Normalize a string for which only a single value is supported. 353 * Extract the string up to the first comma, remove quotes, remove 354 * any forward-slash prefix, trim any whitespace, optionally make 355 * lowercase for easier matching. 356 * 357 * @param hdf Data object in which the metadata values are stored. 358 * @param tag The hdf var from which the metadata should be extracted. 359 * @return A normalized string value for the specified tag. 360 */ getStringValueNormalized(Data hdf, String tag)361 public static String getStringValueNormalized(Data hdf, String tag) { 362 StringBuilder outString = new StringBuilder(); 363 String tagList = hdf.getValue(tag, ""); 364 tagList.replaceAll("\"", ""); 365 if ("".equals(tagList)) { 366 return tagList; 367 } else { 368 int end = tagList.indexOf(","); 369 if (end != -1) { 370 tagList = tagList.substring(0,end); 371 } 372 tagList = tagList.startsWith("/") ? tagList.substring(1) : tagList; 373 if ("sample.group".equals(tag) && sLowercaseTags) { 374 tagList = tagList.toLowerCase(); 375 } 376 outString.append(tagList.trim()); 377 return outString.toString(); 378 } 379 } 380 381 /** 382 * Normalize a page title. Extract the string, remove quotes, remove 383 * markup, and trim any whitespace. 384 * 385 * @param hdf Data object in which the metadata values are stored. 386 * @param tag The hdf var from which the metadata should be extracted. 387 * @return A normalized string value for the specified tag. 388 */ getTitleNormalized(Data hdf, String tag)389 public static String getTitleNormalized(Data hdf, String tag) { 390 StringBuilder outTitle = new StringBuilder(); 391 String title = hdf.getValue(tag, ""); 392 if (!title.isEmpty()) { 393 title = escapeString(title); 394 if (title.indexOf("<span") != -1) { 395 String[] splitTitle = title.split("<span(.*?)</span>"); 396 title = splitTitle[0]; 397 for (int j = 1; j < splitTitle.length; j++) { 398 title.concat(splitTitle[j]); 399 } 400 } 401 outTitle.append(title.trim()); 402 } 403 return outTitle.toString(); 404 } 405 406 /** 407 * Extract and normalize a page's language string based on the 408 * lowercased dir path. Non-supported langs are ignored and assigned 409 * the default lang string of "en". 410 * 411 * @param filename A path string to the file relative to root. 412 * @return A normalized lang value. 413 */ getLangStringNormalized(Data data, String filename)414 public static String getLangStringNormalized(Data data, String filename) { 415 String[] stripStr = filename.toLowerCase().split("\\/", 3); 416 String outFrag = "en"; 417 String pathCanonical = filename; 418 if (stripStr.length > 0) { 419 for (String t : DocFile.DEVSITE_VALID_LANGS) { 420 if ("intl".equals(stripStr[0])) { 421 if (t.equals(stripStr[1])) { 422 outFrag = stripStr[1]; 423 //extract the root url (exclusive of intl/nn) 424 pathCanonical = stripStr[2]; 425 break; 426 } 427 } 428 } 429 } 430 //extract the root url (exclusive of intl/nn) 431 data.setValue("path.canonical", pathCanonical); 432 return outFrag; 433 } 434 435 /** 436 * Normalize a page summary string and truncate as needed. Strings 437 * exceeding max_chars are truncated at the first word boundary 438 * following the max_size marker. Strings smaller than min_chars 439 * are discarded (as they are assumed to be too little context). 440 * 441 * @param s String extracted from the page as it's summary. 442 * @return A normalized string value. 443 */ getSummaryNormalized(String s)444 public static String getSummaryNormalized(String s) { 445 String str = ""; 446 int max_chars = 250; 447 int min_chars = 50; 448 int marker = 0; 449 if (s.length() < min_chars) { 450 return str; 451 } else { 452 str = s.replaceAll("^\"|\"$", ""); 453 str = str.replaceAll("\\s+", " "); 454 str = JD_TAG_PATTERN.matcher(str).replaceAll("$1"); 455 str = escapeString(str); 456 BreakIterator bi = BreakIterator.getWordInstance(); 457 bi.setText(str); 458 if (str.length() > max_chars) { 459 marker = bi.following(max_chars); 460 } else { 461 marker = bi.last(); 462 } 463 str = str.substring(0, marker); 464 str = str.concat("\u2026" ); 465 } 466 return str; 467 } 468 escapeString(String s)469 public static String escapeString(String s) { 470 s = s.replaceAll("\"", """); 471 s = s.replaceAll("\'", "'"); 472 s = s.replaceAll("<", "<"); 473 s = s.replaceAll(">", ">"); 474 s = s.replaceAll("/", "/"); 475 return s; 476 } 477 478 //Disqualify img src urls that include these substrings 479 public static String[] IMAGE_EXCLUDE = {"/triangle-", "favicon","android-logo", 480 "icon_play.png", "robot-tiny"}; 481 inList(String s, String[] list)482 public static boolean inList(String s, String[] list) { 483 for (String t : list) { 484 if (s.contains(t)) { 485 return true; 486 } 487 } 488 return false; 489 } 490 491 /** 492 * Normalize an img src url by removing docRoot and leading 493 * slash for local image references. These are added later 494 * in js to support offline mode and keep path reference 495 * format consistent with hrefs. 496 * 497 * @param url Abs or rel url sourced from img src. 498 * @return Normalized url if qualified, else empty 499 */ getImageUrlNormalized(String url)500 public static String getImageUrlNormalized(String url) { 501 String absUrl = ""; 502 // validate to avoid choosing using specific images 503 if ((url != null) && (!url.equals("")) && (!inList(url, IMAGE_EXCLUDE))) { 504 absUrl = url.replace("{@docRoot}", ""); 505 absUrl = absUrl.replaceFirst("^/(?!/)", ""); 506 } 507 return absUrl; 508 } 509 510 /** 511 * Normalize an href url by removing docRoot and leading 512 * slash for local image references. These are added later 513 * in js to support offline mode and keep path reference 514 * format consistent with hrefs. 515 * 516 * @param url Abs or rel page url sourced from href 517 * @return Normalized url, either abs or rel to root 518 */ getPageUrlNormalized(String url)519 public static String getPageUrlNormalized(String url) { 520 String absUrl = ""; 521 522 if ((url !=null) && (!url.equals(""))) { 523 absUrl = url.replace("{@docRoot}", ""); 524 if (Doclava.USE_DEVSITE_LOCALE_OUTPUT_PATHS) { 525 absUrl = absUrl.replaceFirst("^en/", ""); 526 } 527 absUrl = absUrl.replaceFirst("^/(?!/)", ""); 528 } 529 return absUrl; 530 } 531 532 /** 533 * Given a metadata node, add it as a child of a root node based on its 534 * type. If there is no root node that matches the node's type, create one 535 * and add the metadata node as a child node. 536 * 537 * @param gNode The node to attach to a root node or add as a new root node. 538 * @param rootList The current list of root nodes. 539 * @return The updated list of root nodes. 540 */ appendMetaNodeByLang(Node gNode, List<Node> rootList)541 public static List<Node> appendMetaNodeByLang(Node gNode, List<Node> rootList) { 542 543 String nodeLang = gNode.getLang(); 544 boolean matched = false; 545 for (Node n : rootList) { 546 if (n.getLang().equals(nodeLang)) { //find any matching lang node 547 appendMetaNodeByType(gNode,n.getChildren()); 548 //n.getChildren().add(gNode); 549 matched = true; 550 break; // add to the first root node only 551 } // tag did not match 552 } // end rootnodes matching iterator 553 if (!matched) { 554 List<Node> mlangList = new ArrayList<Node>(); // list of file objects that have a given lang 555 //mlangList.add(gNode); 556 Node tnode = new Node.Builder().setChildren(mlangList).setLang(nodeLang).build(); 557 rootList.add(tnode); 558 appendMetaNodeByType(gNode, mlangList); 559 } 560 return rootList; 561 } 562 563 /** 564 * Given a metadata node, add it as a child of a root node based on its 565 * type. If there is no root node that matches the node's type, create one 566 * and add the metadata node as a child node. 567 * 568 * @param gNode The node to attach to a root node or add as a new root node. 569 * @param rootList The current list of root nodes. 570 * @return The updated list of root nodes. 571 */ appendMetaNodeByType(Node gNode, List<Node> rootList)572 public static List<Node> appendMetaNodeByType(Node gNode, List<Node> rootList) { 573 574 String nodeTags = gNode.getType(); 575 boolean matched = false; 576 for (Node n : rootList) { 577 if (n.getType().equals(nodeTags)) { //find any matching type node 578 n.getChildren().add(gNode); 579 matched = true; 580 break; // add to the first root node only 581 } // tag did not match 582 } // end rootnodes matching iterator 583 if (!matched) { 584 List<Node> mtaglist = new ArrayList<Node>(); // list of file objects that have a given type 585 mtaglist.add(gNode); 586 Node tnode = new Node.Builder().setChildren(mtaglist).setType(nodeTags).build(); 587 rootList.add(tnode); 588 } 589 return rootList; 590 } 591 592 /** 593 * Given a metadata node, add it as a child of a root node based on its 594 * tag. If there is no root node matching the tag, create one for it 595 * and add the metadata node as a child node. 596 * 597 * @param gNode The node to attach to a root node or add as a new root node. 598 * @param rootTagNodesList The current list of root nodes. 599 * @return The updated list of root nodes. 600 */ appendMetaNodeByTagIndex(Node gNode, List<Node> rootTagNodesList)601 public static List<Node> appendMetaNodeByTagIndex(Node gNode, List<Node> rootTagNodesList) { 602 603 for (int iter = 0; iter < gNode.getChildren().size(); iter++) { 604 if (gNode.getChildren().get(iter).getTags() != null) { 605 List<String> nodeTags = gNode.getChildren().get(iter).getTags(); 606 boolean matched = false; 607 for (String t : nodeTags) { //process each of the meta.tags 608 for (Node n : rootTagNodesList) { 609 if (n.getLabel().equals(t.toString())) { 610 n.getTags().add(String.valueOf(iter)); 611 matched = true; 612 break; // add to the first root node only 613 } // tag did not match 614 } // end rootnodes matching iterator 615 if (!matched) { 616 List<String> mtaglist = new ArrayList<String>(); // list of objects with a given tag 617 mtaglist.add(String.valueOf(iter)); 618 Node tnode = new Node.Builder().setLabel(t.toString()).setTags(mtaglist).build(); 619 rootTagNodesList.add(tnode); 620 } 621 } 622 } 623 } 624 return rootTagNodesList; 625 } 626 627 /** 628 * Append the contents of jd_extras to jd_lists_unified for each language. 629 * 630 * @param extrasFilename The lang-specific extras file to append. 631 * @param unifiedFilename The lang-specific unified metadata file. 632 */ appendExtrasMetadata(String extrasFilename, String unifiedFilename)633 public static void appendExtrasMetadata (String extrasFilename, String unifiedFilename) { 634 635 File f = new File(ClearPage.outputDir + "/" + extrasFilename); 636 if (f.exists() && !f.isDirectory()) { 637 ClearPage.copyFile(true, f, unifiedFilename, true); 638 try { 639 if (f.delete()) { 640 if (Doclava.META_DBG) System.out.println(" >>>>> Delete succeeded"); 641 } else { 642 if (Doclava.META_DBG) System.out.println(" >>>>> Delete failed"); 643 } 644 } catch (Exception e) { 645 if (Doclava.META_DBG) System.out.println(" >>>>> Exception: " + e + "\n"); 646 } 647 } 648 } 649 650 public static final Comparator<Node> BY_TAG_NAME = new Comparator<Node>() { 651 public int compare (Node one, Node other) { 652 return one.getLabel().compareTo(other.getLabel()); 653 } 654 }; 655 656 public static final Comparator<Node> BY_TYPE_NAME = new Comparator<Node>() { 657 public int compare (Node one, Node other) { 658 return one.getType().compareTo(other.getType()); 659 } 660 }; 661 662 public static final Comparator<Node> BY_LANG_NAME = new Comparator<Node>() { 663 public int compare (Node one, Node other) { 664 return one.getLang().compareTo(other.getLang()); 665 } 666 }; 667 668 /** 669 * A node for storing page metadata. Use Builder.build() to instantiate. 670 */ 671 public static class Node { 672 673 private String mLabel; // holds page.title or similar identifier 674 private String mCategory; // subtabs, example 'training' 'guides' 675 private String mSummary; // Summary for card or similar use 676 private String mLink; //link href for item click 677 private String mGroup; // from sample.group in _index.jd 678 private List<String> mKeywords; // from page.tags 679 private List<String> mTags; // from meta.tags 680 private String mImage; // holds an href, fully qualified or relative to root 681 private List<Node> mChildren; 682 private String mLang; 683 private String mType; // design, develop, distribute, youtube, blog, etc 684 private String mTimestamp; // optional timestamp eg 1447452827 685 Node(Builder builder)686 private Node(Builder builder) { 687 mLabel = builder.mLabel; 688 mCategory = builder.mCategory; 689 mSummary = builder.mSummary; 690 mLink = builder.mLink; 691 mGroup = builder.mGroup; 692 mKeywords = builder.mKeywords; 693 mTags = builder.mTags; 694 mImage = builder.mImage; 695 mChildren = builder.mChildren; 696 mLang = builder.mLang; 697 mType = builder.mType; 698 mTimestamp = builder.mTimestamp; 699 } 700 701 private static class Builder { 702 private String mLabel, mCategory, mSummary, mLink, mGroup, mImage, mLang, mType, mTimestamp; 703 private List<String> mKeywords = null; 704 private List<String> mTags = null; 705 private List<Node> mChildren = null; setLabel(String mLabel)706 public Builder setLabel(String mLabel) { this.mLabel = mLabel; return this;} setCategory(String mCategory)707 public Builder setCategory(String mCategory) { 708 this.mCategory = mCategory; return this; 709 } setSummary(String mSummary)710 public Builder setSummary(String mSummary) {this.mSummary = mSummary; return this;} setLink(String mLink)711 public Builder setLink(String mLink) {this.mLink = mLink; return this;} setGroup(String mGroup)712 public Builder setGroup(String mGroup) {this.mGroup = mGroup; return this;} setKeywords(List<String> mKeywords)713 public Builder setKeywords(List<String> mKeywords) { 714 this.mKeywords = mKeywords; return this; 715 } setTags(List<String> mTags)716 public Builder setTags(List<String> mTags) {this.mTags = mTags; return this;} setImage(String mImage)717 public Builder setImage(String mImage) {this.mImage = mImage; return this;} setChildren(List<Node> mChildren)718 public Builder setChildren(List<Node> mChildren) {this.mChildren = mChildren; return this;} setLang(String mLang)719 public Builder setLang(String mLang) {this.mLang = mLang; return this;} setType(String mType)720 public Builder setType(String mType) {this.mType = mType; return this;} setTimestamp(String mTimestamp)721 public Builder setTimestamp(String mTimestamp) {this.mTimestamp = mTimestamp; return this;} build()722 public Node build() {return new Node(this);} 723 } 724 725 /** 726 * Render a tree of metadata nodes organized by type. 727 * @param buf Output buffer to render to. 728 */ renderTypeResources(StringBuilder buf)729 void renderTypeResources(StringBuilder buf) { 730 List<Node> list = mChildren; //list of type rootnodes 731 if (list == null || list.size() == 0) { 732 buf.append("null"); 733 } else { 734 final int n = list.size(); 735 for (int i = 0; i < n; i++) { 736 buf.append("var " + list.get(i).mType.toUpperCase() + "_RESOURCES = ["); 737 list.get(i).renderTypes(buf); //render this type's children 738 buf.append("\n];\n\n"); 739 } 740 } 741 } 742 743 /** 744 * Render a tree of metadata nodes organized by lang. 745 * @param buf Output buffer to render to. 746 */ renderLangResources(StringBuilder buf, String langname)747 void renderLangResources(StringBuilder buf, String langname) { 748 List<Node> list = mChildren; //list of type rootnodes 749 if (list == null || list.size() == 0) { 750 buf.append("null"); 751 } else { 752 final int n = list.size(); 753 for (int i = 0; i < n; i++) { 754 buf.append("METADATA['" + langname + "']." + list.get(i).mType + " = ["); 755 list.get(i).renderTypes(buf); //render this lang's children 756 buf.append("\n];\n\n"); 757 } 758 } 759 } 760 /** 761 * Render all metadata nodes for a specific type. 762 * @param buf Output buffer to render to. 763 */ renderTypes(StringBuilder buf)764 void renderTypes(StringBuilder buf) { 765 List<Node> list = mChildren; 766 if (list == null || list.size() == 0) { 767 buf.append("nulltype"); 768 } else { 769 final int n = list.size(); 770 for (int i = 0; i < n; i++) { 771 buf.append("\n {\n"); 772 buf.append(" \"title\":\""); 773 renderStrWithUcs(buf, list.get(i).mLabel); 774 buf.append("\",\n" ); 775 buf.append(" \"summary\":\""); 776 renderStrWithUcs(buf, list.get(i).mSummary); 777 buf.append("\",\n" ); 778 buf.append(" \"url\":\"" + list.get(i).mLink + "\",\n" ); 779 if (!"".equals(list.get(i).mImage)) { 780 buf.append(" \"image\":\"" + list.get(i).mImage + "\",\n" ); 781 } 782 if (!"".equals(list.get(i).mGroup)) { 783 buf.append(" \"group\":\""); 784 renderStrWithUcs(buf, list.get(i).mGroup); 785 buf.append("\",\n" ); 786 } 787 if (!"".equals(list.get(i).mCategory)) { 788 buf.append(" \"category\":\"" + list.get(i).mCategory + "\",\n" ); 789 } 790 if ((list.get(i).mType != null) && (list.get(i).mType != "")) { 791 buf.append(" \"type\":\"" + list.get(i).mType + "\",\n"); 792 } 793 list.get(i).renderArrayType(buf, list.get(i).mKeywords, "keywords"); 794 list.get(i).renderArrayType(buf, list.get(i).mTags, "tags"); 795 if (!"".equals(list.get(i).mTimestamp)) { 796 buf.append(" \"timestamp\":\"" + list.get(i).mTimestamp + "\",\n"); 797 } 798 buf.append(" \"lang\":\"" + list.get(i).mLang + "\"" ); 799 buf.append("\n }"); 800 if (i != n - 1) { 801 buf.append(", "); 802 } 803 } 804 } 805 } 806 807 /** 808 * Build and render a list of tags associated with each type. 809 * @param buf Output buffer to render to. 810 */ renderTypesByTag(StringBuilder buf)811 void renderTypesByTag(StringBuilder buf) { 812 List<Node> list = mChildren; //list of rootnodes 813 if (list == null || list.size() == 0) { 814 buf.append("null"); 815 } else { 816 final int n = list.size(); 817 for (int i = 0; i < n; i++) { 818 buf.append("var " + list.get(i).mType.toUpperCase() + "_BY_TAG = {"); 819 List<Node> mTagList = new ArrayList(); //list of rootnodes 820 mTagList = appendMetaNodeByTagIndex(list.get(i), mTagList); 821 list.get(i).renderTagIndices(buf, mTagList); 822 buf.append("\n};\n\n"); 823 } 824 } 825 } 826 827 /** 828 * Render a list of tags associated with a type, including the 829 * tag's indices in the type array. 830 * @param buf Output buffer to render to. 831 * @param tagList Node tree of types to render. 832 */ renderTagIndices(StringBuilder buf, List<Node> tagList)833 void renderTagIndices(StringBuilder buf, List<Node> tagList) { 834 List<Node> list = tagList; 835 if (list == null || list.size() == 0) { 836 buf.append(""); 837 } else { 838 final int n = list.size(); 839 for (int i = 0; i < n; i++) { 840 buf.append("\n " + list.get(i).mLabel + ":["); 841 renderArrayValue(buf, list.get(i).mTags); 842 buf.append("]"); 843 if (i != n - 1) { 844 buf.append(", "); 845 } 846 } 847 } 848 } 849 850 /** 851 * Render key:arrayvalue pair. 852 * @param buf Output buffer to render to. 853 * @param type The list value to render as an arrayvalue. 854 * @param key The key for the pair. 855 */ renderArrayType(StringBuilder buf, List<String> type, String key)856 void renderArrayType(StringBuilder buf, List<String> type, String key) { 857 buf.append(" \"" + key + "\": ["); 858 renderArrayValue(buf, type); 859 buf.append("],\n"); 860 } 861 862 /** 863 * Render an array value to buf, with special handling of unicode characters. 864 * @param buf Output buffer to render to. 865 * @param type The list value to render as an arrayvalue. 866 */ renderArrayValue(StringBuilder buf, List<String> type)867 void renderArrayValue(StringBuilder buf, List<String> type) { 868 List<String> list = type; 869 if (list != null) { 870 final int n = list.size(); 871 for (int i = 0; i < n; i++) { 872 String tagval = list.get(i).toString(); 873 renderStrWithUcs(buf,tagval); 874 if (i != n - 1) { 875 buf.append(","); 876 } 877 } 878 } 879 } 880 881 /** 882 * Render a string that can include ucs2 encoded characters. 883 * @param buf Output buffer to render to. 884 * @param chars String to append to buf with any necessary encoding 885 */ renderStrWithUcs(StringBuilder buf, String chars)886 void renderStrWithUcs(StringBuilder buf, String chars) { 887 String strval = chars; 888 final int L = strval.length(); 889 for (int t = 0; t < L; t++) { 890 char c = strval.charAt(t); 891 if (c >= Character.MIN_HIGH_SURROGATE && c <= Character.MAX_HIGH_SURROGATE ) { 892 // we have a UTF-16 multi-byte character 893 int codePoint = strval.codePointAt(t); 894 int charSize = Character.charCount(codePoint); 895 t += charSize - 1; 896 buf.append(String.format("\\u%04x",codePoint)); 897 } else if (c >= ' ' && c <= '~' && c != '\\') { 898 buf.append(c); 899 } else { 900 // we are encoding a two byte character 901 buf.append(String.format("\\u%04x", (int) c)); 902 } 903 } 904 } 905 getLabel()906 public String getLabel() { 907 return mLabel; 908 } 909 setLabel(String label)910 public void setLabel(String label) { 911 mLabel = label; 912 } 913 getCategory()914 public String getCategory() { 915 return mCategory; 916 } 917 setCategory(String title)918 public void setCategory(String title) { 919 mCategory = title; 920 } 921 getSummary()922 public String getSummary() { 923 return mSummary; 924 } 925 setSummary(String summary)926 public void setSummary(String summary) { 927 mSummary = summary; 928 } 929 getLink()930 public String getLink() { 931 return mLink; 932 } 933 setLink(String ref)934 public void setLink(String ref) { 935 mLink = ref; 936 } 937 getGroup()938 public String getGroup() { 939 return mGroup; 940 } 941 setGroup(String group)942 public void setGroup(String group) { 943 mGroup = group; 944 } 945 getTags()946 public List<String> getTags() { 947 return mTags; 948 } 949 setTags(String tags)950 public void setTags(String tags) { 951 if ("".equals(tags)) { 952 mTags = null; 953 } else { 954 List<String> tagList = new ArrayList(); 955 String[] tagParts = tags.split(","); 956 957 for (String t : tagParts) { 958 tagList.add(t); 959 } 960 mTags = tagList; 961 } 962 } 963 getKeywords()964 public List<String> getKeywords() { 965 return mKeywords; 966 } 967 setKeywords(String keywords)968 public void setKeywords(String keywords) { 969 if ("".equals(keywords)) { 970 mKeywords = null; 971 } else { 972 List<String> keywordList = new ArrayList(); 973 String[] keywordParts = keywords.split(","); 974 975 for (String k : keywordParts) { 976 keywordList.add(k); 977 } 978 mKeywords = keywordList; 979 } 980 } 981 getImage()982 public String getImage() { 983 return mImage; 984 } 985 setImage(String ref)986 public void setImage(String ref) { 987 mImage = ref; 988 } 989 getChildren()990 public List<Node> getChildren() { 991 return mChildren; 992 } 993 setChildren(List<Node> node)994 public void setChildren(List<Node> node) { 995 mChildren = node; 996 } 997 getLang()998 public String getLang() { 999 return mLang; 1000 } 1001 setLang(String lang)1002 public void setLang(String lang) { 1003 mLang = lang; 1004 } 1005 getType()1006 public String getType() { 1007 return mType; 1008 } 1009 getTimestamp()1010 public String getTimestamp() { 1011 return mTimestamp; 1012 } 1013 setType(String type)1014 public void setType(String type) { 1015 mType = type; 1016 } 1017 setTimestamp(String timestamp)1018 public void setTimestamp(String timestamp) { 1019 mTimestamp = timestamp; 1020 } 1021 } 1022 } 1023