1 /*
2  * Copyright (C) 2013 Google Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.google.doclava;
18 
19 import java.io.*;
20 import java.text.BreakIterator;
21 import java.util.ArrayList;
22 import java.util.Collections;
23 import java.util.Comparator;
24 import java.util.List;
25 import java.util.regex.Pattern;
26 import java.util.regex.Matcher;
27 import java.io.File;
28 
29 import com.google.clearsilver.jsilver.data.Data;
30 
31 import org.ccil.cowan.tagsoup.*;
32 import org.xml.sax.XMLReader;
33 import org.xml.sax.InputSource;
34 import org.xml.sax.Attributes;
35 import org.xml.sax.helpers.DefaultHandler;
36 
37 import org.w3c.dom.Node;
38 import org.w3c.dom.NodeList;
39 
40 import javax.xml.transform.dom.DOMResult;
41 import javax.xml.transform.sax.SAXSource;
42 import javax.xml.transform.Transformer;
43 import javax.xml.transform.TransformerFactory;
44 import javax.xml.xpath.XPath;
45 import javax.xml.xpath.XPathConstants;
46 import javax.xml.xpath.XPathExpression;
47 import javax.xml.xpath.XPathFactory;
48 
49 /**
50 * Metadata associated with a specific documentation page. Extracts
51 * metadata based on the page's declared hdf vars (meta.tags and others)
52 * as well as implicit data relating to the page, such as url, type, etc.
53 * Includes a Node class that represents the metadata and lets it attach
54 * to parent/child elements in the tree metadata nodes for all pages.
55 * Node also includes methods for rendering the node tree to a json file
56 * in docs output, which is then used by JavaScript to load metadata
57 * objects into html pages.
58 */
59 
60 public class PageMetadata {
61   File mSource;
62   String mDest;
63   String mTagList;
64   static boolean sLowercaseTags = true;
65   static boolean sLowercaseKeywords = true;
66   //static String linkPrefix = (Doclava.META_DBG) ? "/" : "http://developer.android.com/";
67   /**
68    * regex pattern to match javadoc @link and similar tags. Extracts
69    * root symbol to $1.
70    */
71   private static final Pattern JD_TAG_PATTERN =
72       Pattern.compile("\\{@.*?[\\s\\.\\#]([A-Za-z\\(\\)\\d_]+)(?=\u007D)\u007D");
73 
PageMetadata(File source, String dest, List<Node> taglist)74   public PageMetadata(File source, String dest, List<Node> taglist) {
75     mSource = source;
76     mDest = dest;
77 
78     if (dest != null) {
79       int len = dest.length();
80       if (len > 1 && dest.charAt(len - 1) != '/') {
81         mDest = dest + '/';
82       } else {
83         mDest = dest;
84       }
85     }
86   }
87 
88   /**
89   * Given a list of metadata nodes organized by type, sort the
90   * root nodes by type name and render the types and their child
91   * metadata nodes to a json file in the out dir.
92   *
93   * @param rootTypeNodesList A list of root metadata nodes, each
94   *        representing a type and it's member child pages.
95   */
WriteList(List<Node> rootTypeNodesList)96   public static void WriteList(List<Node> rootTypeNodesList) {
97 
98     Collections.sort(rootTypeNodesList, BY_TYPE_NAME);
99     Node pageMeta = new Node.Builder().setLabel("TOP").setChildren(rootTypeNodesList).build();
100 
101     StringBuilder buf = new StringBuilder();
102     // write the taglist to string format
103     pageMeta.renderTypeResources(buf);
104     pageMeta.renderTypesByTag(buf);
105     // write the taglist to js file
106     Data data = Doclava.makeHDF();
107     data.setValue("reference_tree", buf.toString());
108     ClearPage.write(data, "jd_lists_unified.cs", "jd_lists_unified.js");
109   }
110 
111   /**
112   * Given a list of metadata nodes organized by lang, sort the
113   * root nodes by type name and render the types and their child
114   * metadata nodes to separate lang-specific json files in the out dir.
115   *
116   * @param rootNodesList A list of root metadata nodes, each
117   *        representing a type and it's member child pages.
118   */
WriteListByLang(List<Node> rootNodesList)119   public static void WriteListByLang(List<Node> rootNodesList) {
120     Collections.sort(rootNodesList, BY_LANG_NAME);
121     for (Node n : rootNodesList) {
122       String langFilename = "";
123       String langname = n.getLang();
124       langFilename = "_" + langname;
125       Collections.sort(n.getChildren(), BY_TYPE_NAME);
126       Node pageMeta = new Node.Builder().setLabel("TOP").setChildren(n.getChildren()).build();
127 
128       StringBuilder buf = new StringBuilder();
129       // write the taglist to string format
130       pageMeta.renderLangResources(buf,langname);
131       //pageMeta.renderTypesByTag(buf);
132       // write the taglist to js file
133       Data data = Doclava.makeHDF();
134       data.setValue("reference_tree", buf.toString());
135       data.setValue("metadata.lang", langname);
136       String unifiedFilename = "jd_lists_unified" + langFilename + ".js";
137       String extrasFilename = "jd_extras" + langFilename + ".js";
138       // write out jd_lists_unified for each lang
139       ClearPage.write(data, "jd_lists_unified.cs", unifiedFilename);
140       // append jd_extras to jd_lists_unified for each lang, then delete.
141       appendExtrasMetadata(extrasFilename, unifiedFilename);
142     }
143   }
144 
145   /**
146   * Extract supported metadata values from a page and add them as
147   * a child node of a root node based on type. Some metadata values
148   * are normalized. Unsupported metadata fields are ignored. See
149   * Node for supported metadata fields and methods for accessing values.
150   *
151   * @param docfile The file from which to extract metadata.
152   * @param dest The output path for the file, used to set link to page.
153   * @param filename The file from which to extract metadata.
154   * @param hdf Data object in which to store the metadata values.
155   * @param tagList The file from which to extract metadata.
156   */
setPageMetadata(String docfile, String dest, String filename, Data hdf, List<Node> tagList)157   public static void setPageMetadata(String docfile, String dest, String filename,
158       Data hdf, List<Node> tagList) {
159     //exclude this page if author does not want it included
160     boolean excludeNode = "true".equals(hdf.getValue("excludeFromSuggestions",""));
161 
162     //check whether summary and image exist and if not, get them from itemprop/markup
163     Boolean needsSummary = "".equals(hdf.getValue("page.metaDescription", ""));
164     Boolean needsImage = "".equals(hdf.getValue("page.image", ""));
165     if ((needsSummary) || (needsImage)) {
166       //try to extract the metadata from itemprop and markup
167       inferMetadata(docfile, hdf, needsSummary, needsImage);
168     }
169 
170     //extract available metadata and set it in a node
171     if (!excludeNode) {
172       Node pageMeta = new Node.Builder().build();
173       pageMeta.setLabel(getTitleNormalized(hdf, "page.title"));
174       pageMeta.setCategory(hdf.getValue("page.category",""));
175       pageMeta.setSummary(hdf.getValue("page.metaDescription",""));
176       pageMeta.setLink(getPageUrlNormalized(filename));
177       pageMeta.setGroup(getStringValueNormalized(hdf,"sample.group"));
178       pageMeta.setKeywords(getPageTagsNormalized(hdf, "page.tags"));
179       pageMeta.setTags(getPageTagsNormalized(hdf, "meta.tags"));
180       pageMeta.setImage(getImageUrlNormalized(hdf.getValue("page.image", "")));
181       pageMeta.setLang(getLangStringNormalized(hdf, filename));
182       pageMeta.setType(getStringValueNormalized(hdf, "page.type"));
183       pageMeta.setTimestamp(hdf.getValue("page.timestamp",""));
184       if (Doclava.USE_UPDATED_TEMPLATES) {
185         appendMetaNodeByLang(pageMeta, tagList);
186       } else {
187         appendMetaNodeByType(pageMeta, tagList);
188       }
189     }
190   }
191 
192   /**
193   * Attempt to infer page metadata based on the contents of the
194   * file. Load and parse the file as a dom tree. Select values
195   * in this order: 1. dom node specifically tagged with
196   * microdata (itemprop). 2. first qualitifed p or img node.
197   *
198   * @param docfile The file from which to extract metadata.
199   * @param hdf Data object in which to store the metadata values.
200   * @param needsSummary Whether to extract summary metadata.
201   * @param needsImage Whether to extract image metadata.
202   */
inferMetadata(String docfile, Data hdf, Boolean needsSummary, Boolean needsImage)203   public static void inferMetadata(String docfile, Data hdf,
204       Boolean needsSummary, Boolean needsImage) {
205     String sum = "";
206     String imageUrl = "";
207     String sumFrom = needsSummary ? "none" : "hdf";
208     String imgFrom = needsImage ? "none" : "hdf";
209     String filedata = hdf.getValue("commentText", "");
210     if (Doclava.META_DBG) System.out.println("----- " + docfile + "\n");
211 
212     try {
213       XPathFactory xpathFac = XPathFactory.newInstance();
214       XPath xpath = xpathFac.newXPath();
215       InputStream inputStream = new ByteArrayInputStream(filedata.getBytes());
216       XMLReader reader = new Parser();
217       reader.setFeature(Parser.namespacesFeature, false);
218       reader.setFeature(Parser.namespacePrefixesFeature, false);
219       reader.setFeature(Parser.ignoreBogonsFeature, true);
220 
221       Transformer transformer = TransformerFactory.newInstance().newTransformer();
222       DOMResult result = new DOMResult();
223       transformer.transform(new SAXSource(reader, new InputSource(inputStream)), result);
224       org.w3c.dom.Node htmlNode = result.getNode();
225 
226       if (needsSummary) {
227         StringBuilder sumStrings = new StringBuilder();
228         XPathExpression ItempropDescExpr = xpath.compile("/descendant-or-self::*"
229             + "[@itemprop='description'][1]//text()[string(.)]");
230         org.w3c.dom.NodeList nodes = (org.w3c.dom.NodeList) ItempropDescExpr.evaluate(htmlNode,
231             XPathConstants.NODESET);
232         if (nodes.getLength() > 0) {
233           for (int i = 0; i < nodes.getLength(); i++) {
234             String tx = nodes.item(i).getNodeValue();
235             sumStrings.append(tx);
236             sumFrom = "itemprop";
237           }
238         } else {
239           XPathExpression FirstParaExpr = xpath.compile("//p[not(../../../"
240               + "@class='notice-developers') and not(../@class='sidebox')"
241               + "and not(@class)]//text()");
242           nodes = (org.w3c.dom.NodeList) FirstParaExpr.evaluate(htmlNode, XPathConstants.NODESET);
243           if (nodes.getLength() > 0) {
244             for (int i = 0; i < nodes.getLength(); i++) {
245               String tx = nodes.item(i).getNodeValue();
246               sumStrings.append(tx + " ");
247               sumFrom = "markup";
248             }
249           }
250         }
251         //found a summary string, now normalize it
252         sum = sumStrings.toString().trim();
253         if ((sum != null) && (!"".equals(sum))) {
254           sum = getSummaryNormalized(sum);
255         }
256         //normalized summary ended up being too short to be meaningful
257         if ("".equals(sum)) {
258            if (Doclava.META_DBG) System.out.println("Warning: description too short! ("
259             + sum.length() + "chars) ...\n\n");
260         }
261         //summary looks good, store it to the file hdf data
262         hdf.setValue("page.metaDescription", sum);
263       }
264       if (needsImage) {
265         XPathExpression ItempropImageExpr = xpath.compile("//*[@itemprop='image']/@src");
266         org.w3c.dom.NodeList imgNodes = (org.w3c.dom.NodeList) ItempropImageExpr.evaluate(htmlNode,
267             XPathConstants.NODESET);
268         if (imgNodes.getLength() > 0) {
269           imageUrl = imgNodes.item(0).getNodeValue();
270           imgFrom = "itemprop";
271         } else {
272           XPathExpression FirstImgExpr = xpath.compile("//img/@src");
273           imgNodes = (org.w3c.dom.NodeList) FirstImgExpr.evaluate(htmlNode, XPathConstants.NODESET);
274           if (imgNodes.getLength() > 0) {
275             //iterate nodes looking for valid image url and normalize.
276             for (int i = 0; i < imgNodes.getLength(); i++) {
277               String tx = imgNodes.item(i).getNodeValue();
278               //qualify and normalize the image
279               imageUrl = getImageUrlNormalized(tx);
280               //this img src did not qualify, keep looking...
281               if ("".equals(imageUrl)) {
282                 if (Doclava.META_DBG) System.out.println("    >>>>> Discarded image: " + tx);
283                 continue;
284               } else {
285                 imgFrom = "markup";
286                 break;
287               }
288             }
289           }
290         }
291         //img src url looks good, store it to the file hdf data
292         hdf.setValue("page.image", imageUrl);
293       }
294       if (Doclava.META_DBG) System.out.println("Image (" + imgFrom + "): " + imageUrl);
295       if (Doclava.META_DBG) System.out.println("Summary (" + sumFrom + "): " + sum.length()
296           + " chars\n\n" + sum + "\n");
297       return;
298 
299     } catch (Exception e) {
300       if (Doclava.META_DBG) System.out.println("    >>>>> Exception: " + e + "\n");
301     }
302   }
303 
304   /**
305   * Normalize a comma-delimited, multi-string value. Split on commas, remove
306   * quotes, trim whitespace, optionally make keywords/tags lowercase for
307   * easier matching.
308   *
309   * @param hdf Data object in which the metadata values are stored.
310   * @param tag The hdf var from which the metadata was extracted.
311   * @return A normalized string value for the specified tag.
312   */
getPageTagsNormalized(Data hdf, String tag)313   public static String getPageTagsNormalized(Data hdf, String tag) {
314 
315     String normTags = "";
316     StringBuilder tags = new StringBuilder();
317     String tagList = hdf.getValue(tag, "");
318     if (tag.equals("meta.tags") && (tagList.equals(""))) {
319       //use keywords as tags if no meta tags are available
320       tagList = hdf.getValue("page.tags", "");
321     }
322     if (!tagList.equals("")) {
323       tagList = tagList.replaceAll("\"", "");
324 
325       String[] tagParts = tagList.split("[,\u3001]");
326       for (int iter = 0; iter < tagParts.length; iter++) {
327         tags.append("\"");
328         if (tag.equals("meta.tags") && sLowercaseTags) {
329           tagParts[iter] = tagParts[iter].toLowerCase();
330         } else if (tag.equals("page.tags") && sLowercaseKeywords) {
331           tagParts[iter] = tagParts[iter].toLowerCase();
332         }
333         if (tag.equals("meta.tags")) {
334           //tags.append("#"); //to match hashtag format used with yt/blogger resources
335           tagParts[iter] = tagParts[iter].replaceAll(" ","");
336         }
337         tags.append(tagParts[iter].trim());
338         tags.append("\"");
339         if (iter < tagParts.length - 1) {
340           tags.append(",");
341         }
342       }
343     }
344     //write this back to hdf to expose through js
345     if (tag.equals("meta.tags")) {
346       hdf.setValue(tag, tags.toString());
347     }
348     return tags.toString();
349   }
350 
351   /**
352   * Normalize a string for which only a single value is supported.
353   * Extract the string up to the first comma, remove quotes, remove
354   * any forward-slash prefix, trim any whitespace, optionally make
355   * lowercase for easier matching.
356   *
357   * @param hdf Data object in which the metadata values are stored.
358   * @param tag The hdf var from which the metadata should be extracted.
359   * @return A normalized string value for the specified tag.
360   */
getStringValueNormalized(Data hdf, String tag)361   public static String getStringValueNormalized(Data hdf, String tag) {
362     StringBuilder outString =  new StringBuilder();
363     String tagList = hdf.getValue(tag, "");
364     tagList.replaceAll("\"", "");
365     if ("".equals(tagList)) {
366       return tagList;
367     } else {
368       int end = tagList.indexOf(",");
369       if (end != -1) {
370         tagList = tagList.substring(0,end);
371       }
372       tagList = tagList.startsWith("/") ? tagList.substring(1) : tagList;
373       if ("sample.group".equals(tag) && sLowercaseTags) {
374         tagList = tagList.toLowerCase();
375       }
376       outString.append(tagList.trim());
377       return outString.toString();
378     }
379   }
380 
381   /**
382   * Normalize a page title. Extract the string, remove quotes, remove
383   * markup, and trim any whitespace.
384   *
385   * @param hdf Data object in which the metadata values are stored.
386   * @param tag The hdf var from which the metadata should be extracted.
387   * @return A normalized string value for the specified tag.
388   */
getTitleNormalized(Data hdf, String tag)389   public static String getTitleNormalized(Data hdf, String tag) {
390     StringBuilder outTitle =  new StringBuilder();
391     String title = hdf.getValue(tag, "");
392     if (!title.isEmpty()) {
393       title = escapeString(title);
394       if (title.indexOf("<span") != -1) {
395         String[] splitTitle = title.split("<span(.*?)</span>");
396         title = splitTitle[0];
397         for (int j = 1; j < splitTitle.length; j++) {
398           title.concat(splitTitle[j]);
399         }
400       }
401       outTitle.append(title.trim());
402     }
403     return outTitle.toString();
404   }
405 
406   /**
407   * Extract and normalize a page's language string based on the
408   * lowercased dir path. Non-supported langs are ignored and assigned
409   * the default lang string of "en".
410   *
411   * @param filename A path string to the file relative to root.
412   * @return A normalized lang value.
413   */
getLangStringNormalized(Data data, String filename)414   public static String getLangStringNormalized(Data data, String filename) {
415     String[] stripStr = filename.toLowerCase().split("\\/", 3);
416     String outFrag = "en";
417     String pathCanonical = filename;
418     if (stripStr.length > 0) {
419       for (String t : DocFile.DEVSITE_VALID_LANGS) {
420         if ("intl".equals(stripStr[0])) {
421           if (t.equals(stripStr[1])) {
422             outFrag = stripStr[1];
423             //extract the root url (exclusive of intl/nn)
424             pathCanonical = stripStr[2];
425             break;
426           }
427         }
428       }
429     }
430     //extract the root url (exclusive of intl/nn)
431     data.setValue("path.canonical", pathCanonical);
432     return outFrag;
433   }
434 
435   /**
436   * Normalize a page summary string and truncate as needed. Strings
437   * exceeding max_chars are truncated at the first word boundary
438   * following the max_size marker. Strings smaller than min_chars
439   * are discarded (as they are assumed to be too little context).
440   *
441   * @param s String extracted from the page as it's summary.
442   * @return A normalized string value.
443   */
getSummaryNormalized(String s)444   public static String getSummaryNormalized(String s) {
445     String str = "";
446     int max_chars = 250;
447     int min_chars = 50;
448     int marker = 0;
449     if (s.length() < min_chars) {
450       return str;
451     } else {
452       str = s.replaceAll("^\"|\"$", "");
453       str = str.replaceAll("\\s+", " ");
454       str = JD_TAG_PATTERN.matcher(str).replaceAll("$1");
455       str = escapeString(str);
456       BreakIterator bi = BreakIterator.getWordInstance();
457       bi.setText(str);
458       if (str.length() > max_chars) {
459         marker = bi.following(max_chars);
460       } else {
461         marker = bi.last();
462       }
463       str = str.substring(0, marker);
464       str = str.concat("\u2026" );
465     }
466     return str;
467   }
468 
escapeString(String s)469   public static String escapeString(String s) {
470     s = s.replaceAll("\"", "&quot;");
471     s = s.replaceAll("\'", "&#39;");
472     s = s.replaceAll("<", "&lt;");
473     s = s.replaceAll(">", "&gt;");
474     s = s.replaceAll("/", "&#47;");
475     return s;
476   }
477 
478   //Disqualify img src urls that include these substrings
479   public static String[] IMAGE_EXCLUDE = {"/triangle-", "favicon","android-logo",
480       "icon_play.png", "robot-tiny"};
481 
inList(String s, String[] list)482   public static boolean inList(String s, String[] list) {
483     for (String t : list) {
484       if (s.contains(t)) {
485         return true;
486       }
487     }
488     return false;
489   }
490 
491   /**
492   * Normalize an img src url by removing docRoot and leading
493   * slash for local image references. These are added later
494   * in js to support offline mode and keep path reference
495   * format consistent with hrefs.
496   *
497   * @param url Abs or rel url sourced from img src.
498   * @return Normalized url if qualified, else empty
499   */
getImageUrlNormalized(String url)500   public static String getImageUrlNormalized(String url) {
501     String absUrl = "";
502     // validate to avoid choosing using specific images
503     if ((url != null) && (!url.equals("")) && (!inList(url, IMAGE_EXCLUDE))) {
504       absUrl = url.replace("{@docRoot}", "");
505       absUrl = absUrl.replaceFirst("^/(?!/)", "");
506     }
507     return absUrl;
508   }
509 
510   /**
511   * Normalize an href url by removing docRoot and leading
512   * slash for local image references. These are added later
513   * in js to support offline mode and keep path reference
514   * format consistent with hrefs.
515   *
516   * @param url Abs or rel page url sourced from href
517   * @return Normalized url, either abs or rel to root
518   */
getPageUrlNormalized(String url)519   public static String getPageUrlNormalized(String url) {
520     String absUrl = "";
521 
522     if ((url !=null) && (!url.equals(""))) {
523       absUrl = url.replace("{@docRoot}", "");
524       if (Doclava.USE_DEVSITE_LOCALE_OUTPUT_PATHS) {
525         absUrl = absUrl.replaceFirst("^en/", "");
526       }
527       absUrl = absUrl.replaceFirst("^/(?!/)", "");
528     }
529     return absUrl;
530   }
531 
532   /**
533   * Given a metadata node, add it as a child of a root node based on its
534   * type. If there is no root node that matches the node's type, create one
535   * and add the metadata node as a child node.
536   *
537   * @param gNode The node to attach to a root node or add as a new root node.
538   * @param rootList The current list of root nodes.
539   * @return The updated list of root nodes.
540   */
appendMetaNodeByLang(Node gNode, List<Node> rootList)541   public static List<Node> appendMetaNodeByLang(Node gNode, List<Node> rootList) {
542 
543     String nodeLang = gNode.getLang();
544     boolean matched = false;
545     for (Node n : rootList) {
546       if (n.getLang().equals(nodeLang)) {  //find any matching lang node
547         appendMetaNodeByType(gNode,n.getChildren());
548         //n.getChildren().add(gNode);
549         matched = true;
550         break; // add to the first root node only
551       } // tag did not match
552     } // end rootnodes matching iterator
553     if (!matched) {
554       List<Node> mlangList = new ArrayList<Node>(); // list of file objects that have a given lang
555       //mlangList.add(gNode);
556       Node tnode = new Node.Builder().setChildren(mlangList).setLang(nodeLang).build();
557       rootList.add(tnode);
558       appendMetaNodeByType(gNode, mlangList);
559     }
560     return rootList;
561   }
562 
563   /**
564   * Given a metadata node, add it as a child of a root node based on its
565   * type. If there is no root node that matches the node's type, create one
566   * and add the metadata node as a child node.
567   *
568   * @param gNode The node to attach to a root node or add as a new root node.
569   * @param rootList The current list of root nodes.
570   * @return The updated list of root nodes.
571   */
appendMetaNodeByType(Node gNode, List<Node> rootList)572   public static List<Node> appendMetaNodeByType(Node gNode, List<Node> rootList) {
573 
574     String nodeTags = gNode.getType();
575     boolean matched = false;
576     for (Node n : rootList) {
577       if (n.getType().equals(nodeTags)) {  //find any matching type node
578         n.getChildren().add(gNode);
579         matched = true;
580         break; // add to the first root node only
581       } // tag did not match
582     } // end rootnodes matching iterator
583     if (!matched) {
584       List<Node> mtaglist = new ArrayList<Node>(); // list of file objects that have a given type
585       mtaglist.add(gNode);
586       Node tnode = new Node.Builder().setChildren(mtaglist).setType(nodeTags).build();
587       rootList.add(tnode);
588     }
589     return rootList;
590   }
591 
592   /**
593   * Given a metadata node, add it as a child of a root node based on its
594   * tag. If there is no root node matching the tag, create one for it
595   * and add the metadata node as a child node.
596   *
597   * @param gNode The node to attach to a root node or add as a new root node.
598   * @param rootTagNodesList The current list of root nodes.
599   * @return The updated list of root nodes.
600   */
appendMetaNodeByTagIndex(Node gNode, List<Node> rootTagNodesList)601   public static List<Node> appendMetaNodeByTagIndex(Node gNode, List<Node> rootTagNodesList) {
602 
603     for (int iter = 0; iter < gNode.getChildren().size(); iter++) {
604       if (gNode.getChildren().get(iter).getTags() != null) {
605         List<String> nodeTags = gNode.getChildren().get(iter).getTags();
606         boolean matched = false;
607         for (String t : nodeTags) { //process each of the meta.tags
608           for (Node n : rootTagNodesList) {
609             if (n.getLabel().equals(t.toString())) {
610               n.getTags().add(String.valueOf(iter));
611               matched = true;
612               break; // add to the first root node only
613             } // tag did not match
614           } // end rootnodes matching iterator
615           if (!matched) {
616             List<String> mtaglist = new ArrayList<String>(); // list of objects with a given tag
617             mtaglist.add(String.valueOf(iter));
618             Node tnode = new Node.Builder().setLabel(t.toString()).setTags(mtaglist).build();
619             rootTagNodesList.add(tnode);
620           }
621         }
622       }
623     }
624     return rootTagNodesList;
625   }
626 
627   /**
628   * Append the contents of jd_extras to jd_lists_unified for each language.
629   *
630   * @param extrasFilename The lang-specific extras file to append.
631   * @param unifiedFilename The lang-specific unified metadata file.
632   */
appendExtrasMetadata(String extrasFilename, String unifiedFilename)633   public static void appendExtrasMetadata (String extrasFilename, String unifiedFilename) {
634 
635     File f = new File(ClearPage.outputDir + "/" + extrasFilename);
636     if (f.exists() && !f.isDirectory()) {
637       ClearPage.copyFile(true, f, unifiedFilename, true);
638       try {
639         if (f.delete()) {
640           if (Doclava.META_DBG) System.out.println("    >>>>> Delete succeeded");
641         } else {
642           if (Doclava.META_DBG) System.out.println("    >>>>> Delete failed");
643         }
644       } catch (Exception e) {
645         if (Doclava.META_DBG) System.out.println("    >>>>> Exception: " + e + "\n");
646       }
647     }
648   }
649 
650   public static final Comparator<Node> BY_TAG_NAME = new Comparator<Node>() {
651     public int compare (Node one, Node other) {
652       return one.getLabel().compareTo(other.getLabel());
653     }
654   };
655 
656   public static final Comparator<Node> BY_TYPE_NAME = new Comparator<Node>() {
657     public int compare (Node one, Node other) {
658       return one.getType().compareTo(other.getType());
659     }
660   };
661 
662     public static final Comparator<Node> BY_LANG_NAME = new Comparator<Node>() {
663     public int compare (Node one, Node other) {
664       return one.getLang().compareTo(other.getLang());
665     }
666   };
667 
668   /**
669   * A node for storing page metadata. Use Builder.build() to instantiate.
670   */
671   public static class Node {
672 
673     private String mLabel; // holds page.title or similar identifier
674     private String mCategory; // subtabs, example 'training' 'guides'
675     private String mSummary; // Summary for card or similar use
676     private String mLink; //link href for item click
677     private String mGroup; // from sample.group in _index.jd
678     private List<String> mKeywords; // from page.tags
679     private List<String> mTags; // from meta.tags
680     private String mImage; // holds an href, fully qualified or relative to root
681     private List<Node> mChildren;
682     private String mLang;
683     private String mType; // design, develop, distribute, youtube, blog, etc
684     private String mTimestamp; // optional timestamp eg 1447452827
685 
Node(Builder builder)686     private Node(Builder builder) {
687       mLabel = builder.mLabel;
688       mCategory = builder.mCategory;
689       mSummary = builder.mSummary;
690       mLink = builder.mLink;
691       mGroup = builder.mGroup;
692       mKeywords = builder.mKeywords;
693       mTags = builder.mTags;
694       mImage = builder.mImage;
695       mChildren = builder.mChildren;
696       mLang = builder.mLang;
697       mType = builder.mType;
698       mTimestamp = builder.mTimestamp;
699     }
700 
701     private static class Builder {
702       private String mLabel, mCategory, mSummary, mLink, mGroup, mImage, mLang, mType, mTimestamp;
703       private List<String> mKeywords = null;
704       private List<String> mTags = null;
705       private List<Node> mChildren = null;
setLabel(String mLabel)706       public Builder setLabel(String mLabel) { this.mLabel = mLabel; return this;}
setCategory(String mCategory)707       public Builder setCategory(String mCategory) {
708         this.mCategory = mCategory; return this;
709       }
setSummary(String mSummary)710       public Builder setSummary(String mSummary) {this.mSummary = mSummary; return this;}
setLink(String mLink)711       public Builder setLink(String mLink) {this.mLink = mLink; return this;}
setGroup(String mGroup)712       public Builder setGroup(String mGroup) {this.mGroup = mGroup; return this;}
setKeywords(List<String> mKeywords)713       public Builder setKeywords(List<String> mKeywords) {
714         this.mKeywords = mKeywords; return this;
715       }
setTags(List<String> mTags)716       public Builder setTags(List<String> mTags) {this.mTags = mTags; return this;}
setImage(String mImage)717       public Builder setImage(String mImage) {this.mImage = mImage; return this;}
setChildren(List<Node> mChildren)718       public Builder setChildren(List<Node> mChildren) {this.mChildren = mChildren; return this;}
setLang(String mLang)719       public Builder setLang(String mLang) {this.mLang = mLang; return this;}
setType(String mType)720       public Builder setType(String mType) {this.mType = mType; return this;}
setTimestamp(String mTimestamp)721       public Builder setTimestamp(String mTimestamp) {this.mTimestamp = mTimestamp; return this;}
build()722       public Node build() {return new Node(this);}
723     }
724 
725     /**
726     * Render a tree of metadata nodes organized by type.
727     * @param buf Output buffer to render to.
728     */
renderTypeResources(StringBuilder buf)729     void renderTypeResources(StringBuilder buf) {
730       List<Node> list = mChildren; //list of type rootnodes
731       if (list == null || list.size() == 0) {
732         buf.append("null");
733       } else {
734         final int n = list.size();
735         for (int i = 0; i < n; i++) {
736           buf.append("var " + list.get(i).mType.toUpperCase() + "_RESOURCES = [");
737           list.get(i).renderTypes(buf); //render this type's children
738           buf.append("\n];\n\n");
739         }
740       }
741     }
742 
743     /**
744     * Render a tree of metadata nodes organized by lang.
745     * @param buf Output buffer to render to.
746     */
renderLangResources(StringBuilder buf, String langname)747     void renderLangResources(StringBuilder buf, String langname) {
748       List<Node> list = mChildren; //list of type rootnodes
749       if (list == null || list.size() == 0) {
750         buf.append("null");
751       } else {
752         final int n = list.size();
753         for (int i = 0; i < n; i++) {
754           buf.append("METADATA['" + langname + "']." + list.get(i).mType + " = [");
755           list.get(i).renderTypes(buf); //render this lang's children
756           buf.append("\n];\n\n");
757         }
758       }
759     }
760     /**
761     * Render all metadata nodes for a specific type.
762     * @param buf Output buffer to render to.
763     */
renderTypes(StringBuilder buf)764     void renderTypes(StringBuilder buf) {
765       List<Node> list = mChildren;
766       if (list == null || list.size() == 0) {
767         buf.append("nulltype");
768       } else {
769         final int n = list.size();
770         for (int i = 0; i < n; i++) {
771           buf.append("\n      {\n");
772           buf.append("        \"title\":\"");
773           renderStrWithUcs(buf, list.get(i).mLabel);
774           buf.append("\",\n" );
775           buf.append("        \"summary\":\"");
776           renderStrWithUcs(buf, list.get(i).mSummary);
777           buf.append("\",\n" );
778           buf.append("        \"url\":\"" + list.get(i).mLink + "\",\n" );
779           if (!"".equals(list.get(i).mImage)) {
780             buf.append("        \"image\":\"" + list.get(i).mImage + "\",\n" );
781           }
782           if (!"".equals(list.get(i).mGroup)) {
783             buf.append("        \"group\":\"");
784             renderStrWithUcs(buf, list.get(i).mGroup);
785             buf.append("\",\n" );
786           }
787           if (!"".equals(list.get(i).mCategory)) {
788             buf.append("        \"category\":\"" + list.get(i).mCategory + "\",\n" );
789           }
790           if ((list.get(i).mType != null) && (list.get(i).mType != "")) {
791             buf.append("        \"type\":\"" + list.get(i).mType + "\",\n");
792           }
793           list.get(i).renderArrayType(buf, list.get(i).mKeywords, "keywords");
794           list.get(i).renderArrayType(buf, list.get(i).mTags, "tags");
795           if (!"".equals(list.get(i).mTimestamp)) {
796             buf.append("        \"timestamp\":\"" + list.get(i).mTimestamp + "\",\n");
797           }
798           buf.append("        \"lang\":\"" + list.get(i).mLang + "\"" );
799           buf.append("\n      }");
800           if (i != n - 1) {
801             buf.append(", ");
802           }
803         }
804       }
805     }
806 
807     /**
808     * Build and render a list of tags associated with each type.
809     * @param buf Output buffer to render to.
810     */
renderTypesByTag(StringBuilder buf)811     void renderTypesByTag(StringBuilder buf) {
812       List<Node> list = mChildren; //list of rootnodes
813       if (list == null || list.size() == 0) {
814         buf.append("null");
815       } else {
816         final int n = list.size();
817         for (int i = 0; i < n; i++) {
818         buf.append("var " + list.get(i).mType.toUpperCase() + "_BY_TAG = {");
819         List<Node> mTagList = new ArrayList(); //list of rootnodes
820         mTagList = appendMetaNodeByTagIndex(list.get(i), mTagList);
821         list.get(i).renderTagIndices(buf, mTagList);
822           buf.append("\n};\n\n");
823         }
824       }
825     }
826 
827     /**
828     * Render a list of tags associated with a type, including the
829     * tag's indices in the type array.
830     * @param buf Output buffer to render to.
831     * @param tagList Node tree of types to render.
832     */
renderTagIndices(StringBuilder buf, List<Node> tagList)833     void renderTagIndices(StringBuilder buf, List<Node> tagList) {
834       List<Node> list = tagList;
835       if (list == null || list.size() == 0) {
836         buf.append("");
837       } else {
838         final int n = list.size();
839         for (int i = 0; i < n; i++) {
840           buf.append("\n    " + list.get(i).mLabel + ":[");
841           renderArrayValue(buf, list.get(i).mTags);
842           buf.append("]");
843           if (i != n - 1) {
844             buf.append(", ");
845           }
846         }
847       }
848     }
849 
850     /**
851     * Render key:arrayvalue pair.
852     * @param buf Output buffer to render to.
853     * @param type The list value to render as an arrayvalue.
854     * @param key The key for the pair.
855     */
renderArrayType(StringBuilder buf, List<String> type, String key)856     void renderArrayType(StringBuilder buf, List<String> type, String key) {
857       buf.append("        \"" + key + "\": [");
858       renderArrayValue(buf, type);
859       buf.append("],\n");
860     }
861 
862     /**
863     * Render an array value to buf, with special handling of unicode characters.
864     * @param buf Output buffer to render to.
865     * @param type The list value to render as an arrayvalue.
866     */
renderArrayValue(StringBuilder buf, List<String> type)867     void renderArrayValue(StringBuilder buf, List<String> type) {
868       List<String> list = type;
869       if (list != null) {
870         final int n = list.size();
871         for (int i = 0; i < n; i++) {
872           String tagval = list.get(i).toString();
873           renderStrWithUcs(buf,tagval);
874           if (i != n - 1) {
875             buf.append(",");
876           }
877         }
878       }
879     }
880 
881     /**
882     * Render a string that can include ucs2 encoded characters.
883     * @param buf Output buffer to render to.
884     * @param chars String to append to buf with any necessary encoding
885     */
renderStrWithUcs(StringBuilder buf, String chars)886     void renderStrWithUcs(StringBuilder buf, String chars) {
887       String strval = chars;
888       final int L = strval.length();
889       for (int t = 0; t < L; t++) {
890         char c = strval.charAt(t);
891         if (c >= Character.MIN_HIGH_SURROGATE && c <= Character.MAX_HIGH_SURROGATE ) {
892           // we have a UTF-16 multi-byte character
893           int codePoint = strval.codePointAt(t);
894           int charSize = Character.charCount(codePoint);
895           t += charSize - 1;
896           buf.append(String.format("\\u%04x",codePoint));
897         } else if (c >= ' ' && c <= '~' && c != '\\') {
898           buf.append(c);
899         } else {
900           // we are encoding a two byte character
901           buf.append(String.format("\\u%04x", (int) c));
902         }
903       }
904     }
905 
getLabel()906     public String getLabel() {
907       return mLabel;
908     }
909 
setLabel(String label)910     public void setLabel(String label) {
911        mLabel = label;
912     }
913 
getCategory()914     public String getCategory() {
915       return mCategory;
916     }
917 
setCategory(String title)918     public void setCategory(String title) {
919        mCategory = title;
920     }
921 
getSummary()922     public String getSummary() {
923       return mSummary;
924     }
925 
setSummary(String summary)926     public void setSummary(String summary) {
927        mSummary = summary;
928     }
929 
getLink()930     public String getLink() {
931       return mLink;
932     }
933 
setLink(String ref)934     public void setLink(String ref) {
935        mLink = ref;
936     }
937 
getGroup()938     public String getGroup() {
939       return mGroup;
940     }
941 
setGroup(String group)942     public void setGroup(String group) {
943       mGroup = group;
944     }
945 
getTags()946     public List<String> getTags() {
947         return mTags;
948     }
949 
setTags(String tags)950     public void setTags(String tags) {
951       if ("".equals(tags)) {
952         mTags = null;
953       } else {
954         List<String> tagList = new ArrayList();
955         String[] tagParts = tags.split(",");
956 
957         for (String t : tagParts) {
958           tagList.add(t);
959         }
960         mTags = tagList;
961       }
962     }
963 
getKeywords()964     public List<String> getKeywords() {
965         return mKeywords;
966     }
967 
setKeywords(String keywords)968     public void setKeywords(String keywords) {
969       if ("".equals(keywords)) {
970         mKeywords = null;
971       } else {
972         List<String> keywordList = new ArrayList();
973         String[] keywordParts = keywords.split(",");
974 
975         for (String k : keywordParts) {
976           keywordList.add(k);
977         }
978         mKeywords = keywordList;
979       }
980     }
981 
getImage()982     public String getImage() {
983         return mImage;
984     }
985 
setImage(String ref)986     public void setImage(String ref) {
987        mImage = ref;
988     }
989 
getChildren()990     public List<Node> getChildren() {
991         return mChildren;
992     }
993 
setChildren(List<Node> node)994     public void setChildren(List<Node> node) {
995         mChildren = node;
996     }
997 
getLang()998     public String getLang() {
999       return mLang;
1000     }
1001 
setLang(String lang)1002     public void setLang(String lang) {
1003       mLang = lang;
1004     }
1005 
getType()1006     public String getType() {
1007       return mType;
1008     }
1009 
getTimestamp()1010     public String getTimestamp() {
1011       return mTimestamp;
1012     }
1013 
setType(String type)1014     public void setType(String type) {
1015       mType = type;
1016     }
1017 
setTimestamp(String timestamp)1018     public void setTimestamp(String timestamp) {
1019       mTimestamp = timestamp;
1020     }
1021   }
1022 }
1023